[chat-template] Unify tests and clean up 🧼 (#37275)

* fix tests and some clean up

* make one general test for each modality

* remove redundant merging of kwargs

* edge cases

* dont enforce slow when reloading

* fix gemma3 tests

* has to adapt llama 4 after rebase

* remove also from overriden tests

* should be green now
This commit is contained in:
Raushan Turganbay
2025-04-10 14:42:32 +02:00
committed by GitHub
parent 10144ff116
commit 1ae8d54b04
18 changed files with 389 additions and 1112 deletions

View File

@@ -62,77 +62,6 @@ class Mistral3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def tearDown(self):
shutil.rmtree(self.tmpdirname)
def test_chat_template_accepts_processing_kwargs(self):
# override to use slow image processor to return numpy arrays
processor = self.processor_class.from_pretrained(self.tmpdirname, use_fast=False)
if processor.chat_template is None:
self.skipTest("Processor has no chat template")
messages = [
[
{
"role": "user",
"content": [
{"type": "text", "text": "What is shown in this image?"},
],
},
]
]
formatted_prompt_tokenized = processor.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
padding="max_length",
truncation=True,
max_length=50,
)
self.assertEqual(len(formatted_prompt_tokenized[0]), 50)
formatted_prompt_tokenized = processor.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
truncation=True,
max_length=5,
)
self.assertEqual(len(formatted_prompt_tokenized[0]), 5)
# Now test the ability to return dict
messages[0][0]["content"].append(
{"type": "image", "url": "https://www.ilankelman.org/stopsigns/australia.jpg"}
)
out_dict = processor.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
do_rescale=True,
rescale_factor=-1,
return_tensors="np",
)
self.assertLessEqual(out_dict[self.images_input_name][0][0].mean(), 0)
def test_chat_template(self):
processor = self.processor_class.from_pretrained(self.tmpdirname, use_fast=False)
expected_prompt = "<s>[SYSTEM_PROMPT][/SYSTEM_PROMPT][INST][IMG]What is shown in this image?[/INST]"
messages = [
{
"role": "system",
"content": "",
},
{
"role": "user",
"content": [
{"type": "image"},
{"type": "text", "text": "What is shown in this image?"},
],
},
]
formatted_prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
self.assertEqual(expected_prompt, formatted_prompt)
def test_image_token_filling(self):
processor = self.processor_class.from_pretrained(self.tmpdirname)
# Important to check with non square image