Rework add-new-model-like with modular and make test filenames coherent (#39612)

* remove tf/flax

* fix

* style

* Update add_new_model_like.py

* work in progress

* continue

* more cleanup

* simplify and first final version

* fixes -> it works

* add linter checks

* Update add_new_model_like.py

* fix

* add modular conversion at the end

* Update add_new_model_like.py

* add video processor

* Update add_new_model_like.py

* Update add_new_model_like.py

* Update add_new_model_like.py

* fix

* Update image_processing_auto.py

* Update image_processing_auto.py

* fix post rebase

* start test filenames replacement

* rename all test_processor -> test_processing

* fix copied from

* add docstrings

* Update add_new_model_like.py

* fix regex

* improve wording

* Update add_new_model_like.py

* Update add_new_model_like.py

* Update add_new_model_like.py

* start adding test

* fix

* fix

* proper first test

* tests

* fix

* fix

* fix

* fix

* modular can be used from anywhere

* protect import

* fix

* Update add_new_model_like.py

* fix
This commit is contained in:
Cyril Vallez
2025-08-04 14:41:09 +02:00
committed by GitHub
parent 5fb5b6cfaf
commit 380b2a0317
87 changed files with 1438 additions and 2869 deletions

View File

@@ -95,7 +95,7 @@ class AriaProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def tearDownClass(cls):
shutil.rmtree(cls.tmpdirname, ignore_errors=True)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -80,7 +80,7 @@ class AyaVisionProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def tearDownClass(cls):
shutil.rmtree(cls.tmpdirname, ignore_errors=True)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -75,7 +75,7 @@ class ChameleonProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def prepare_processor_dict():
return {"image_seq_length": 2} # fmt: skip
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -35,15 +35,15 @@ class ClvpProcessorTest(unittest.TestCase):
shutil.rmtree(self.tmpdirname)
gc.collect()
# Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.get_tokenizer with Whisper->Clvp
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.get_tokenizer with Whisper->Clvp
def get_tokenizer(self, **kwargs):
return ClvpTokenizer.from_pretrained(self.checkpoint, **kwargs)
# Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.get_feature_extractor with Whisper->Clvp
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.get_feature_extractor with Whisper->Clvp
def get_feature_extractor(self, **kwargs):
return ClvpFeatureExtractor.from_pretrained(self.checkpoint, **kwargs)
# Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.test_save_load_pretrained_default with Whisper->Clvp
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_save_load_pretrained_default with Whisper->Clvp
def test_save_load_pretrained_default(self):
tokenizer = self.get_tokenizer()
feature_extractor = self.get_feature_extractor()
@@ -59,7 +59,7 @@ class ClvpProcessorTest(unittest.TestCase):
self.assertEqual(processor.feature_extractor.to_json_string(), feature_extractor.to_json_string())
self.assertIsInstance(processor.feature_extractor, ClvpFeatureExtractor)
# Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.test_feature_extractor with Whisper->Clvp,processor(raw_speech->processor(raw_speech=raw_speech
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_feature_extractor with Whisper->Clvp,processor(raw_speech->processor(raw_speech=raw_speech
def test_feature_extractor(self):
feature_extractor = self.get_feature_extractor()
tokenizer = self.get_tokenizer()
@@ -74,7 +74,7 @@ class ClvpProcessorTest(unittest.TestCase):
for key in input_feat_extract:
self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2)
# Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer with Whisper->Clvp
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_tokenizer with Whisper->Clvp
def test_tokenizer(self):
feature_extractor = self.get_feature_extractor()
tokenizer = self.get_tokenizer()
@@ -90,7 +90,7 @@ class ClvpProcessorTest(unittest.TestCase):
for key in encoded_tok:
self.assertListEqual(encoded_tok[key], encoded_processor[key])
# Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->Clvp
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->Clvp
def test_tokenizer_decode(self):
feature_extractor = self.get_feature_extractor()
tokenizer = self.get_tokenizer()

View File

@@ -54,7 +54,7 @@ class ColPaliProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def tearDownClass(cls):
shutil.rmtree(cls.tmpdirname, ignore_errors=True)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -57,7 +57,7 @@ class ColQwen2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def tearDownClass(cls):
shutil.rmtree(cls.tmpdirname)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -91,7 +91,7 @@ class Emu3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
# For an image where pixels go from 0 to 255 the diff can be 1 due to some numerical precision errors when scaling and unscaling
self.assertTrue(np.abs(orig_image - unnormalized_images).max() >= 1)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -64,7 +64,7 @@ class FuyuProcessingTest(ProcessorTesterMixin, unittest.TestCase):
def get_image_processor(self, **kwargs):
return AutoProcessor.from_pretrained(self.tmpdirname, **kwargs).image_processor
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -58,7 +58,7 @@ class Gemma3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor.save_pretrained(cls.tmpdirname)
cls.image_token = processor.boi_token
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -94,17 +94,17 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
return [labels, labels_longer] + [labels] * (batch_size - 2)
@classmethod
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.get_tokenizer with CLIP->Bert
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.get_tokenizer with CLIP->Bert
def get_tokenizer(cls, **kwargs):
return BertTokenizer.from_pretrained(cls.tmpdirname, **kwargs)
@classmethod
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.get_rust_tokenizer with CLIP->Bert
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.get_rust_tokenizer with CLIP->Bert
def get_rust_tokenizer(cls, **kwargs):
return BertTokenizerFast.from_pretrained(cls.tmpdirname, **kwargs)
@classmethod
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.get_image_processor with CLIP->GroundingDino
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.get_image_processor with CLIP->GroundingDino
def get_image_processor(cls, **kwargs):
return GroundingDinoImageProcessor.from_pretrained(cls.tmpdirname, **kwargs)
@@ -145,7 +145,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
expected_box_slice = torch.tensor([0.6908, 0.4354, 1.0737, 1.3947])
torch.testing.assert_close(post_processed[0]["boxes"][0], expected_box_slice, rtol=1e-4, atol=1e-4)
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_save_load_pretrained_default with CLIP->GroundingDino,GroundingDinoTokenizer->BertTokenizer
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_save_load_pretrained_default with CLIP->GroundingDino,GroundingDinoTokenizer->BertTokenizer
def test_save_load_pretrained_default(self):
tokenizer_slow = self.get_tokenizer()
tokenizer_fast = self.get_rust_tokenizer()
@@ -171,7 +171,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
self.assertIsInstance(processor_slow.image_processor, GroundingDinoImageProcessor)
self.assertIsInstance(processor_fast.image_processor, GroundingDinoImageProcessor)
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_save_load_pretrained_additional_features with CLIP->GroundingDino,GroundingDinoTokenizer->BertTokenizer
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_save_load_pretrained_additional_features with CLIP->GroundingDino,GroundingDinoTokenizer->BertTokenizer
def test_save_load_pretrained_additional_features(self):
with tempfile.TemporaryDirectory() as tmpdir:
processor = GroundingDinoProcessor(
@@ -194,7 +194,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
self.assertEqual(processor.image_processor.to_json_string(), image_processor_add_kwargs.to_json_string())
self.assertIsInstance(processor.image_processor, GroundingDinoImageProcessor)
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_image_processor with CLIP->GroundingDino
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_image_processor with CLIP->GroundingDino
def test_image_processor(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()
@@ -209,7 +209,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
for key in input_image_proc:
self.assertAlmostEqual(input_image_proc[key].sum(), input_processor[key].sum(), delta=1e-2)
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_tokenizer with CLIP->GroundingDino
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_tokenizer with CLIP->GroundingDino
def test_tokenizer(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()
@@ -244,7 +244,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
with pytest.raises(ValueError):
processor()
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_tokenizer_decode with CLIP->GroundingDino
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_tokenizer_decode with CLIP->GroundingDino
def test_tokenizer_decode(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()
@@ -258,7 +258,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
self.assertListEqual(decoded_tok, decoded_processor)
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_model_input_names with CLIP->GroundingDino
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_model_input_names with CLIP->GroundingDino
def test_model_input_names(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()

View File

@@ -84,7 +84,7 @@ class Idefics3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def prepare_processor_dict():
return {"image_seq_len": 2}
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"
@@ -284,7 +284,7 @@ class Idefics3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
self.assertEqual(np.array(inputs["pixel_values"]).shape, (2, 2, 3, 364, 364))
self.assertEqual(np.array(inputs["pixel_attention_mask"]).shape, (2, 2, 364, 364))
# Copied from tests.models.idefics2.test_processor_idefics2.Idefics2ProcessorTest.test_process_interleaved_images_prompts_image_error
# Copied from tests.models.idefics2.test_processing_idefics2.Idefics2ProcessorTest.test_process_interleaved_images_prompts_image_error
def test_process_interleaved_images_prompts_image_error(self):
processor = self.get_processor()

View File

@@ -97,7 +97,7 @@ class InternVLProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def tearDownClass(cls):
shutil.rmtree(cls.tmpdirname, ignore_errors=True)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -66,7 +66,7 @@ class LlavaNextProcessorTest(ProcessorTesterMixin, unittest.TestCase):
"vision_feature_select_strategy": "default"
} # fmt: skip
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"
@@ -79,7 +79,7 @@ class LlavaNextProcessorTest(ProcessorTesterMixin, unittest.TestCase):
self.assertTrue("num_image_patches" in output)
self.assertEqual(len(output["num_image_patches"]), 3)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_chat_template_is_saved
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_chat_template_is_saved
def test_chat_template_is_saved(self):
processor_loaded = self.processor_class.from_pretrained(self.tmpdirname)
processor_dict_loaded = json.loads(processor_loaded.to_json_string())

View File

@@ -75,7 +75,7 @@ class LlavaNextVideoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
"vision_feature_select_strategy": "default",
}
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"
@@ -88,7 +88,7 @@ class LlavaNextVideoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
self.assertTrue("num_image_patches" in output)
self.assertEqual(len(output["num_image_patches"]), 3)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_chat_template_is_saved
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_chat_template_is_saved
def test_chat_template_is_saved(self):
processor_loaded = self.processor_class.from_pretrained(self.tmpdirname)
processor_dict_loaded = json.loads(processor_loaded.to_json_string())

View File

@@ -79,7 +79,7 @@ class LlavaOnevisionProcessorTest(ProcessorTesterMixin, unittest.TestCase):
"vision_feature_select_strategy": "default"
} # fmt: skip
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"
@@ -92,7 +92,7 @@ class LlavaOnevisionProcessorTest(ProcessorTesterMixin, unittest.TestCase):
self.assertTrue("num_image_patches" in output)
self.assertEqual(len(output["num_image_patches"]), 3)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_chat_template_is_saved
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_chat_template_is_saved
def test_chat_template_is_saved(self):
processor_loaded = self.processor_class.from_pretrained(self.tmpdirname)
processor_dict_loaded = json.loads(processor_loaded.to_json_string())

View File

@@ -50,7 +50,7 @@ def floats_list(shape, scale=1.0, rng=None, name=None):
@require_torch
@require_sentencepiece
@require_torchaudio
# Copied from tests.models.musicgen.test_processor_musicgen.MusicgenProcessorTest with Musicgen->MusicgenMelody, Encodec->MusicgenMelody, padding_mask->attention_mask, input_values->input_features
# Copied from tests.models.musicgen.test_processing_musicgen.MusicgenProcessorTest with Musicgen->MusicgenMelody, Encodec->MusicgenMelody, padding_mask->attention_mask, input_values->input_features
class MusicgenMelodyProcessorTest(unittest.TestCase):
def setUp(self):
# Ignore copy

View File

@@ -48,7 +48,7 @@ class PaliGemmaProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def tearDownClass(cls):
shutil.rmtree(cls.tmpdirname, ignore_errors=True)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -65,7 +65,7 @@ class Qwen2_5_VLProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def tearDownClass(cls):
shutil.rmtree(cls.tmpdirname, ignore_errors=True)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -68,7 +68,7 @@ class Qwen2VLProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def tearDownClass(cls):
shutil.rmtree(cls.tmpdirname, ignore_errors=True)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -76,7 +76,7 @@ class SeamlessM4TProcessorTest(unittest.TestCase):
tokenizer_instance = isinstance(processor.tokenizer, (SeamlessM4TTokenizerFast, SeamlessM4TTokenizer))
self.assertTrue(tokenizer_instance)
# Copied from test.models.whisper.test_processor_whisper.WhisperProcessorTest.test_feature_extractor with Whisper->SeamlessM4T
# Copied from test.models.whisper.test_processing_whisper.WhisperProcessorTest.test_feature_extractor with Whisper->SeamlessM4T
def test_feature_extractor(self):
feature_extractor = self.get_feature_extractor()
tokenizer = self.get_tokenizer()
@@ -91,7 +91,7 @@ class SeamlessM4TProcessorTest(unittest.TestCase):
for key in input_feat_extract:
self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2)
# Copied from test.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer with Whisper->SeamlessM4T
# Copied from test.models.whisper.test_processing_whisper.WhisperProcessorTest.test_tokenizer with Whisper->SeamlessM4T
def test_tokenizer(self):
feature_extractor = self.get_feature_extractor()
tokenizer = self.get_tokenizer()
@@ -107,7 +107,7 @@ class SeamlessM4TProcessorTest(unittest.TestCase):
for key in encoded_tok:
self.assertListEqual(encoded_tok[key], encoded_processor[key])
# Copied from test.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->SeamlessM4T
# Copied from test.models.whisper.test_processing_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->SeamlessM4T
def test_tokenizer_decode(self):
feature_extractor = self.get_feature_extractor()
tokenizer = self.get_tokenizer()

View File

@@ -297,7 +297,7 @@ class SmolVLMProcessorTest(ProcessorTesterMixin, unittest.TestCase):
self.assertEqual(np.array(inputs["pixel_values"]).shape, (2, 2, 3, 512, 512))
self.assertEqual(np.array(inputs["pixel_attention_mask"]).shape, (2, 2, 512, 512))
# Copied from tests.models.idefics2.test_processor_idefics2.Idefics2ProcessorTest.test_process_interleaved_images_prompts_image_error
# Copied from tests.models.idefics2.test_processing_idefics2.Idefics2ProcessorTest.test_process_interleaved_images_prompts_image_error
def test_process_interleaved_images_prompts_image_error(self):
processor = self.get_processor()

File diff suppressed because it is too large Load Diff