Chat template: update for processor (#35953)

* update * we need batched nested input to always process correctly * update a bit * fix copies
2025-02-10 09:52:19 +01:00
parent 5bd7694781
commit eebd2c972c
21 changed files with 966 additions and 111 deletions
--- a/tests/models/llava/test_processor_llava.py
+++ b/tests/models/llava/test_processor_llava.py
@@ -17,7 +17,7 @@ import tempfile
 import unittest

 from transformers import AutoProcessor, AutoTokenizer, LlamaTokenizerFast, LlavaProcessor
-from transformers.testing_utils import require_torch, require_vision
+from transformers.testing_utils import require_vision
 from transformers.utils import is_torch_available, is_vision_available

 from ...test_processing_common import ProcessorTesterMixin
@@ -27,7 +27,7 @@ if is_vision_available():
    from transformers import CLIPImageProcessor

 if is_torch_available:
-    import torch
+    pass


@require_vision
@@ -53,7 +53,11 @@ class LlavaProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        shutil.rmtree(self.tmpdirname)

    def prepare_processor_dict(self):
-        return {"chat_template": "dummy_template", "patch_size": 3, "vision_feature_select_strategy": "default"}
+        return {
+            "chat_template": "{% for message in messages %}{% if message['role'] != 'system' %}{{ message['role'].upper() + ': '}}{% endif %}{# Render all images first #}{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}{{ '<image>\n' }}{% endfor %}{# Render all text next #}{% if message['role'] != 'assistant' %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{{ content['text'] + ' '}}{% endfor %}{% else %}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{% generation %}{{ content['text'] + ' '}}{% endgeneration %}{% endfor %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT:' }}{% endif %}",
+            "patch_size": 3,
+            "vision_feature_select_strategy": "default"
+        }  # fmt: skip

    @unittest.skip(
        "Skip because the model has no processor kwargs except for chat template and"
@@ -123,29 +127,6 @@ class LlavaProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        )
        self.assertListEqual(list(out_dict_with_image.keys()), ["input_ids", "attention_mask", "pixel_values"])

-    @require_torch
-    def test_chat_template_dict_torch(self):
-        processor = LlavaProcessor.from_pretrained("llava-hf/llava-1.5-7b-hf")
-        messages = [
-            {
-                "role": "user",
-                "content": [
-                    {"type": "image", "url": "https://www.ilankelman.org/stopsigns/australia.jpg"},
-                    {"type": "text", "text": "What is shown in this image?"},
-                ],
-            },
-        ]
-
-        out_dict_tensors = processor.apply_chat_template(
-            messages,
-            add_generation_prompt=True,
-            tokenize=True,
-            return_dict=True,
-            return_tensors="pt",
-        )
-        self.assertListEqual(list(out_dict_tensors.keys()), ["input_ids", "attention_mask", "pixel_values"])
-        self.assertTrue(isinstance(out_dict_tensors["input_ids"], torch.Tensor))
-
    def test_chat_template_with_continue_final_message(self):
        processor = LlavaProcessor.from_pretrained("llava-hf/llava-1.5-7b-hf")
        expected_prompt = "USER: <image>\nDescribe this image. ASSISTANT: There is a dog and"