[chat-template] Unify tests and clean up 🧼 (#37275)

* fix tests and some clean up * make one general test for each modality * remove redundant merging of kwargs * edge cases * dont enforce slow when reloading * fix gemma3 tests * has to adapt llama 4 after rebase * remove also from overriden tests * should be green now
2025-04-10 14:42:32 +02:00
parent 10144ff116
commit 1ae8d54b04
18 changed files with 389 additions and 1112 deletions
--- a/tests/models/mistral3/test_processor_mistral3.py
+++ b/tests/models/mistral3/test_processor_mistral3.py
@@ -62,77 +62,6 @@ class Mistral3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

-    def test_chat_template_accepts_processing_kwargs(self):
-        # override to use slow image processor to return numpy arrays
-        processor = self.processor_class.from_pretrained(self.tmpdirname, use_fast=False)
-        if processor.chat_template is None:
-            self.skipTest("Processor has no chat template")
-
-        messages = [
-            [
-                {
-                    "role": "user",
-                    "content": [
-                        {"type": "text", "text": "What is shown in this image?"},
-                    ],
-                },
-            ]
-        ]
-
-        formatted_prompt_tokenized = processor.apply_chat_template(
-            messages,
-            add_generation_prompt=True,
-            tokenize=True,
-            padding="max_length",
-            truncation=True,
-            max_length=50,
-        )
-        self.assertEqual(len(formatted_prompt_tokenized[0]), 50)
-
-        formatted_prompt_tokenized = processor.apply_chat_template(
-            messages,
-            add_generation_prompt=True,
-            tokenize=True,
-            truncation=True,
-            max_length=5,
-        )
-        self.assertEqual(len(formatted_prompt_tokenized[0]), 5)
-
-        # Now test the ability to return dict
-        messages[0][0]["content"].append(
-            {"type": "image", "url": "https://www.ilankelman.org/stopsigns/australia.jpg"}
-        )
-        out_dict = processor.apply_chat_template(
-            messages,
-            add_generation_prompt=True,
-            tokenize=True,
-            return_dict=True,
-            do_rescale=True,
-            rescale_factor=-1,
-            return_tensors="np",
-        )
-        self.assertLessEqual(out_dict[self.images_input_name][0][0].mean(), 0)
-
-    def test_chat_template(self):
-        processor = self.processor_class.from_pretrained(self.tmpdirname, use_fast=False)
-        expected_prompt = "<s>[SYSTEM_PROMPT][/SYSTEM_PROMPT][INST][IMG]What is shown in this image?[/INST]"
-
-        messages = [
-            {
-                "role": "system",
-                "content": "",
-            },
-            {
-                "role": "user",
-                "content": [
-                    {"type": "image"},
-                    {"type": "text", "text": "What is shown in this image?"},
-                ],
-            },
-        ]
-        formatted_prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
-        self.assertEqual(expected_prompt, formatted_prompt)
-
    def test_image_token_filling(self):
        processor = self.processor_class.from_pretrained(self.tmpdirname)
        # Important to check with non square image