Rework add-new-model-like with modular and make test filenames coherent (#39612)

* remove tf/flax

* fix

* style

* Update add_new_model_like.py

* work in progress

* continue

* more cleanup

* simplify and first final version

* fixes -> it works

* add linter checks

* Update add_new_model_like.py

* fix

* add modular conversion at the end

* Update add_new_model_like.py

* add video processor

* Update add_new_model_like.py

* Update add_new_model_like.py

* Update add_new_model_like.py

* fix

* Update image_processing_auto.py

* Update image_processing_auto.py

* fix post rebase

* start test filenames replacement

* rename all test_processor -> test_processing

* fix copied from

* add docstrings

* Update add_new_model_like.py

* fix regex

* improve wording

* Update add_new_model_like.py

* Update add_new_model_like.py

* Update add_new_model_like.py

* start adding test

* fix

* fix

* proper first test

* tests

* fix

* fix

* fix

* fix

* modular can be used from anywhere

* protect import

* fix

* Update add_new_model_like.py

* fix
This commit is contained in:
Cyril Vallez
2025-08-04 14:41:09 +02:00
committed by GitHub
parent 5fb5b6cfaf
commit 380b2a0317
87 changed files with 1438 additions and 2869 deletions

View File

@@ -29,7 +29,6 @@ from transformers.testing_utils import HfDoctestModule, HfDocTestParser
NOT_DEVICE_TESTS = {
"test_tokenization",
"test_tokenization_mistral_common",
"test_processor",
"test_processing",
"test_beam_constraints",
"test_configuration_utils",

File diff suppressed because it is too large Load Diff

View File

@@ -62,7 +62,7 @@ else:
("aimv2", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
("aimv2_vision_model", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
("align", ("EfficientNetImageProcessor", "EfficientNetImageProcessorFast")),
("aria", ("AriaImageProcessor")),
("aria", ("AriaImageProcessor", None)),
("beit", ("BeitImageProcessor", "BeitImageProcessorFast")),
("bit", ("BitImageProcessor", "BitImageProcessorFast")),
("blip", ("BlipImageProcessor", "BlipImageProcessorFast")),
@@ -72,7 +72,7 @@ else:
("chinese_clip", ("ChineseCLIPImageProcessor", "ChineseCLIPImageProcessorFast")),
("clip", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
("clipseg", ("ViTImageProcessor", "ViTImageProcessorFast")),
("cohere2_vision", ("Cohere2VisionImageProcessorFast",)),
("cohere2_vision", (None, "Cohere2VisionImageProcessorFast")),
("conditional_detr", ("ConditionalDetrImageProcessor", "ConditionalDetrImageProcessorFast")),
("convnext", ("ConvNextImageProcessor", "ConvNextImageProcessorFast")),
("convnextv2", ("ConvNextImageProcessor", "ConvNextImageProcessorFast")),
@@ -84,52 +84,52 @@ else:
("deit", ("DeiTImageProcessor", "DeiTImageProcessorFast")),
("depth_anything", ("DPTImageProcessor", "DPTImageProcessorFast")),
("depth_pro", ("DepthProImageProcessor", "DepthProImageProcessorFast")),
("deta", ("DetaImageProcessor",)),
("deta", ("DetaImageProcessor", None)),
("detr", ("DetrImageProcessor", "DetrImageProcessorFast")),
("dinat", ("ViTImageProcessor", "ViTImageProcessorFast")),
("dinov2", ("BitImageProcessor", "BitImageProcessorFast")),
("donut-swin", ("DonutImageProcessor", "DonutImageProcessorFast")),
("dpt", ("DPTImageProcessor", "DPTImageProcessorFast")),
("efficientformer", ("EfficientFormerImageProcessor",)),
("efficientloftr", ("EfficientLoFTRImageProcessor",)),
("efficientformer", ("EfficientFormerImageProcessor", None)),
("efficientloftr", ("EfficientLoFTRImageProcessor", None)),
("efficientnet", ("EfficientNetImageProcessor", "EfficientNetImageProcessorFast")),
("eomt", ("EomtImageProcessor", "EomtImageProcessorFast")),
("flava", ("FlavaImageProcessor", "FlavaImageProcessorFast")),
("focalnet", ("BitImageProcessor", "BitImageProcessorFast")),
("fuyu", ("FuyuImageProcessor",)),
("fuyu", ("FuyuImageProcessor", None)),
("gemma3", ("Gemma3ImageProcessor", "Gemma3ImageProcessorFast")),
("gemma3n", ("SiglipImageProcessor", "SiglipImageProcessorFast")),
("git", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
("glm4v", ("Glm4vImageProcessor", "Glm4vImageProcessorFast")),
("glpn", ("GLPNImageProcessor",)),
("glpn", ("GLPNImageProcessor", None)),
("got_ocr2", ("GotOcr2ImageProcessor", "GotOcr2ImageProcessorFast")),
("grounding-dino", ("GroundingDinoImageProcessor", "GroundingDinoImageProcessorFast")),
("groupvit", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
("hiera", ("BitImageProcessor", "BitImageProcessorFast")),
("idefics", ("IdeficsImageProcessor",)),
("idefics", ("IdeficsImageProcessor", None)),
("idefics2", ("Idefics2ImageProcessor", "Idefics2ImageProcessorFast")),
("idefics3", ("Idefics3ImageProcessor", "Idefics3ImageProcessorFast")),
("ijepa", ("ViTImageProcessor", "ViTImageProcessorFast")),
("imagegpt", ("ImageGPTImageProcessor",)),
("imagegpt", ("ImageGPTImageProcessor", None)),
("instructblip", ("BlipImageProcessor", "BlipImageProcessorFast")),
("instructblipvideo", ("InstructBlipVideoImageProcessor",)),
("instructblipvideo", ("InstructBlipVideoImageProcessor", None)),
("janus", ("JanusImageProcessor", "JanusImageProcessorFast")),
("kosmos-2", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
("layoutlmv2", ("LayoutLMv2ImageProcessor", "LayoutLMv2ImageProcessorFast")),
("layoutlmv3", ("LayoutLMv3ImageProcessor", "LayoutLMv3ImageProcessorFast")),
("levit", ("LevitImageProcessor", "LevitImageProcessorFast")),
("lightglue", ("LightGlueImageProcessor",)),
("lightglue", ("LightGlueImageProcessor", None)),
("llama4", ("Llama4ImageProcessor", "Llama4ImageProcessorFast")),
("llava", ("LlavaImageProcessor", "LlavaImageProcessorFast")),
("llava_next", ("LlavaNextImageProcessor", "LlavaNextImageProcessorFast")),
("llava_next_video", ("LlavaNextVideoImageProcessor",)),
("llava_next_video", ("LlavaNextVideoImageProcessor", None)),
("llava_onevision", ("LlavaOnevisionImageProcessor", "LlavaOnevisionImageProcessorFast")),
("mask2former", ("Mask2FormerImageProcessor", "Mask2FormerImageProcessorFast")),
("maskformer", ("MaskFormerImageProcessor", "MaskFormerImageProcessorFast")),
("mgp-str", ("ViTImageProcessor", "ViTImageProcessorFast")),
("mistral3", ("PixtralImageProcessor", "PixtralImageProcessorFast")),
("mlcd", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
("mllama", ("MllamaImageProcessor",)),
("mllama", ("MllamaImageProcessor", None)),
("mm-grounding-dino", ("GroundingDinoImageProcessor", "GroundingDinoImageProcessorFast")),
("mobilenet_v1", ("MobileNetV1ImageProcessor", "MobileNetV1ImageProcessorFast")),
("mobilenet_v2", ("MobileNetV2ImageProcessor", "MobileNetV2ImageProcessorFast")),
@@ -142,12 +142,12 @@ else:
("owlvit", ("OwlViTImageProcessor", "OwlViTImageProcessorFast")),
("paligemma", ("SiglipImageProcessor", "SiglipImageProcessorFast")),
("perceiver", ("PerceiverImageProcessor", "PerceiverImageProcessorFast")),
("perception_lm", ("PerceptionLMImageProcessorFast",)),
("phi4_multimodal", ("Phi4MultimodalImageProcessorFast",)),
("pix2struct", ("Pix2StructImageProcessor",)),
("perception_lm", (None, "PerceptionLMImageProcessorFast")),
("phi4_multimodal", (None, "Phi4MultimodalImageProcessorFast")),
("pix2struct", ("Pix2StructImageProcessor", None)),
("pixtral", ("PixtralImageProcessor", "PixtralImageProcessorFast")),
("poolformer", ("PoolFormerImageProcessor", "PoolFormerImageProcessorFast")),
("prompt_depth_anything", ("PromptDepthAnythingImageProcessor",)),
("prompt_depth_anything", ("PromptDepthAnythingImageProcessor", None)),
("pvt", ("PvtImageProcessor", "PvtImageProcessorFast")),
("pvt_v2", ("PvtImageProcessor", "PvtImageProcessorFast")),
("qwen2_5_vl", ("Qwen2VLImageProcessor", "Qwen2VLImageProcessorFast")),
@@ -157,39 +157,31 @@ else:
("rt_detr", ("RTDetrImageProcessor", "RTDetrImageProcessorFast")),
("sam", ("SamImageProcessor", "SamImageProcessorFast")),
("sam_hq", ("SamImageProcessor", "SamImageProcessorFast")),
("segformer", ("SegformerImageProcessor",)),
("segformer", ("SegformerImageProcessor", "SegformerImageProcessorFast")),
("seggpt", ("SegGptImageProcessor",)),
("seggpt", ("SegGptImageProcessor", None)),
("shieldgemma2", ("Gemma3ImageProcessor", "Gemma3ImageProcessorFast")),
("siglip", ("SiglipImageProcessor", "SiglipImageProcessorFast")),
("siglip2", ("Siglip2ImageProcessor", "Siglip2ImageProcessorFast")),
("smolvlm", ("SmolVLMImageProcessor", "SmolVLMImageProcessorFast")),
("superglue", ("SuperGlueImageProcessor",)),
(
"superpoint",
(
"SuperPointImageProcessor",
"SuperPointImageProcessorFast",
),
),
("superglue", ("SuperGlueImageProcessor", None)),
("superpoint", ("SuperPointImageProcessor", "SuperPointImageProcessorFast")),
("swiftformer", ("ViTImageProcessor", "ViTImageProcessorFast")),
("swin", ("ViTImageProcessor", "ViTImageProcessorFast")),
("swin2sr", ("Swin2SRImageProcessor", "Swin2SRImageProcessorFast")),
("swinv2", ("ViTImageProcessor", "ViTImageProcessorFast")),
("table-transformer", ("DetrImageProcessor",)),
("timesformer", ("VideoMAEImageProcessor",)),
("timm_wrapper", ("TimmWrapperImageProcessor",)),
("tvlt", ("TvltImageProcessor",)),
("tvp", ("TvpImageProcessor",)),
("table-transformer", ("DetrImageProcessor", None)),
("timesformer", ("VideoMAEImageProcessor", None)),
("timm_wrapper", ("TimmWrapperImageProcessor", None)),
("tvlt", ("TvltImageProcessor", None)),
("tvp", ("TvpImageProcessor", None)),
("udop", ("LayoutLMv3ImageProcessor", "LayoutLMv3ImageProcessorFast")),
("udop", ("LayoutLMv3ImageProcessor",)),
("upernet", ("SegformerImageProcessor", "SegformerImageProcessorFast")),
("van", ("ConvNextImageProcessor", "ConvNextImageProcessorFast")),
("videomae", ("VideoMAEImageProcessor",)),
("videomae", ("VideoMAEImageProcessor", None)),
("vilt", ("ViltImageProcessor", "ViltImageProcessorFast")),
("vipllava", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
("vit", ("ViTImageProcessor", "ViTImageProcessorFast")),
("vit_hybrid", ("ViTHybridImageProcessor",)),
("vit_hybrid", ("ViTHybridImageProcessor", None)),
("vit_mae", ("ViTImageProcessor", "ViTImageProcessorFast")),
("vit_msn", ("ViTImageProcessor", "ViTImageProcessorFast")),
("vitmatte", ("VitMatteImageProcessor", "VitMatteImageProcessorFast")),
@@ -199,18 +191,14 @@ else:
]
)
for model_type, image_processors in IMAGE_PROCESSOR_MAPPING_NAMES.items():
slow_image_processor_class, *fast_image_processor_class = image_processors
# Override to None if the packages are not available
for model_type, (slow_class, fast_class) in IMAGE_PROCESSOR_MAPPING_NAMES.items():
if not is_vision_available():
slow_image_processor_class = None
slow_class = None
if not is_torchvision_available():
fast_class = None
# If the fast image processor is not defined, or torchvision is not available, we set it to None
if not fast_image_processor_class or fast_image_processor_class[0] is None or not is_torchvision_available():
fast_image_processor_class = None
else:
fast_image_processor_class = fast_image_processor_class[0]
IMAGE_PROCESSOR_MAPPING_NAMES[model_type] = (slow_image_processor_class, fast_image_processor_class)
IMAGE_PROCESSOR_MAPPING_NAMES[model_type] = (slow_class, fast_class)
IMAGE_PROCESSOR_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, IMAGE_PROCESSOR_MAPPING_NAMES)

View File

@@ -179,6 +179,7 @@ from .import_utils import (
is_keras_nlp_available,
is_kernels_available,
is_levenshtein_available,
is_libcst_available,
is_librosa_available,
is_liger_kernel_available,
is_lomo_available,

View File

@@ -120,6 +120,7 @@ _vptq_available, _vptq_version = _is_package_available("vptq", return_version=Tr
_av_available = importlib.util.find_spec("av") is not None
_decord_available = importlib.util.find_spec("decord") is not None
_torchcodec_available = importlib.util.find_spec("torchcodec") is not None
_libcst_available = _is_package_available("libcst")
_bitsandbytes_available = _is_package_available("bitsandbytes")
_eetq_available = _is_package_available("eetq")
_fbgemm_gpu_available = _is_package_available("fbgemm_gpu")
@@ -379,6 +380,10 @@ def is_torch_available():
return _torch_available
def is_libcst_available():
return _libcst_available
def is_accelerate_available(min_version: str = ACCELERATE_MIN_VERSION):
return _accelerate_available and version.parse(_accelerate_version) >= version.parse(min_version)

View File

@@ -95,7 +95,7 @@ class AriaProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def tearDownClass(cls):
shutil.rmtree(cls.tmpdirname, ignore_errors=True)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -80,7 +80,7 @@ class AyaVisionProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def tearDownClass(cls):
shutil.rmtree(cls.tmpdirname, ignore_errors=True)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -75,7 +75,7 @@ class ChameleonProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def prepare_processor_dict():
return {"image_seq_length": 2} # fmt: skip
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -35,15 +35,15 @@ class ClvpProcessorTest(unittest.TestCase):
shutil.rmtree(self.tmpdirname)
gc.collect()
# Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.get_tokenizer with Whisper->Clvp
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.get_tokenizer with Whisper->Clvp
def get_tokenizer(self, **kwargs):
return ClvpTokenizer.from_pretrained(self.checkpoint, **kwargs)
# Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.get_feature_extractor with Whisper->Clvp
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.get_feature_extractor with Whisper->Clvp
def get_feature_extractor(self, **kwargs):
return ClvpFeatureExtractor.from_pretrained(self.checkpoint, **kwargs)
# Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.test_save_load_pretrained_default with Whisper->Clvp
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_save_load_pretrained_default with Whisper->Clvp
def test_save_load_pretrained_default(self):
tokenizer = self.get_tokenizer()
feature_extractor = self.get_feature_extractor()
@@ -59,7 +59,7 @@ class ClvpProcessorTest(unittest.TestCase):
self.assertEqual(processor.feature_extractor.to_json_string(), feature_extractor.to_json_string())
self.assertIsInstance(processor.feature_extractor, ClvpFeatureExtractor)
# Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.test_feature_extractor with Whisper->Clvp,processor(raw_speech->processor(raw_speech=raw_speech
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_feature_extractor with Whisper->Clvp,processor(raw_speech->processor(raw_speech=raw_speech
def test_feature_extractor(self):
feature_extractor = self.get_feature_extractor()
tokenizer = self.get_tokenizer()
@@ -74,7 +74,7 @@ class ClvpProcessorTest(unittest.TestCase):
for key in input_feat_extract:
self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2)
# Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer with Whisper->Clvp
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_tokenizer with Whisper->Clvp
def test_tokenizer(self):
feature_extractor = self.get_feature_extractor()
tokenizer = self.get_tokenizer()
@@ -90,7 +90,7 @@ class ClvpProcessorTest(unittest.TestCase):
for key in encoded_tok:
self.assertListEqual(encoded_tok[key], encoded_processor[key])
# Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->Clvp
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->Clvp
def test_tokenizer_decode(self):
feature_extractor = self.get_feature_extractor()
tokenizer = self.get_tokenizer()

View File

@@ -54,7 +54,7 @@ class ColPaliProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def tearDownClass(cls):
shutil.rmtree(cls.tmpdirname, ignore_errors=True)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -57,7 +57,7 @@ class ColQwen2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def tearDownClass(cls):
shutil.rmtree(cls.tmpdirname)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -91,7 +91,7 @@ class Emu3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
# For an image where pixels go from 0 to 255 the diff can be 1 due to some numerical precision errors when scaling and unscaling
self.assertTrue(np.abs(orig_image - unnormalized_images).max() >= 1)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -64,7 +64,7 @@ class FuyuProcessingTest(ProcessorTesterMixin, unittest.TestCase):
def get_image_processor(self, **kwargs):
return AutoProcessor.from_pretrained(self.tmpdirname, **kwargs).image_processor
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -58,7 +58,7 @@ class Gemma3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor.save_pretrained(cls.tmpdirname)
cls.image_token = processor.boi_token
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -94,17 +94,17 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
return [labels, labels_longer] + [labels] * (batch_size - 2)
@classmethod
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.get_tokenizer with CLIP->Bert
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.get_tokenizer with CLIP->Bert
def get_tokenizer(cls, **kwargs):
return BertTokenizer.from_pretrained(cls.tmpdirname, **kwargs)
@classmethod
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.get_rust_tokenizer with CLIP->Bert
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.get_rust_tokenizer with CLIP->Bert
def get_rust_tokenizer(cls, **kwargs):
return BertTokenizerFast.from_pretrained(cls.tmpdirname, **kwargs)
@classmethod
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.get_image_processor with CLIP->GroundingDino
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.get_image_processor with CLIP->GroundingDino
def get_image_processor(cls, **kwargs):
return GroundingDinoImageProcessor.from_pretrained(cls.tmpdirname, **kwargs)
@@ -145,7 +145,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
expected_box_slice = torch.tensor([0.6908, 0.4354, 1.0737, 1.3947])
torch.testing.assert_close(post_processed[0]["boxes"][0], expected_box_slice, rtol=1e-4, atol=1e-4)
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_save_load_pretrained_default with CLIP->GroundingDino,GroundingDinoTokenizer->BertTokenizer
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_save_load_pretrained_default with CLIP->GroundingDino,GroundingDinoTokenizer->BertTokenizer
def test_save_load_pretrained_default(self):
tokenizer_slow = self.get_tokenizer()
tokenizer_fast = self.get_rust_tokenizer()
@@ -171,7 +171,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
self.assertIsInstance(processor_slow.image_processor, GroundingDinoImageProcessor)
self.assertIsInstance(processor_fast.image_processor, GroundingDinoImageProcessor)
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_save_load_pretrained_additional_features with CLIP->GroundingDino,GroundingDinoTokenizer->BertTokenizer
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_save_load_pretrained_additional_features with CLIP->GroundingDino,GroundingDinoTokenizer->BertTokenizer
def test_save_load_pretrained_additional_features(self):
with tempfile.TemporaryDirectory() as tmpdir:
processor = GroundingDinoProcessor(
@@ -194,7 +194,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
self.assertEqual(processor.image_processor.to_json_string(), image_processor_add_kwargs.to_json_string())
self.assertIsInstance(processor.image_processor, GroundingDinoImageProcessor)
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_image_processor with CLIP->GroundingDino
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_image_processor with CLIP->GroundingDino
def test_image_processor(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()
@@ -209,7 +209,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
for key in input_image_proc:
self.assertAlmostEqual(input_image_proc[key].sum(), input_processor[key].sum(), delta=1e-2)
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_tokenizer with CLIP->GroundingDino
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_tokenizer with CLIP->GroundingDino
def test_tokenizer(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()
@@ -244,7 +244,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
with pytest.raises(ValueError):
processor()
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_tokenizer_decode with CLIP->GroundingDino
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_tokenizer_decode with CLIP->GroundingDino
def test_tokenizer_decode(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()
@@ -258,7 +258,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
self.assertListEqual(decoded_tok, decoded_processor)
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_model_input_names with CLIP->GroundingDino
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_model_input_names with CLIP->GroundingDino
def test_model_input_names(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()

View File

@@ -84,7 +84,7 @@ class Idefics3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def prepare_processor_dict():
return {"image_seq_len": 2}
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"
@@ -284,7 +284,7 @@ class Idefics3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
self.assertEqual(np.array(inputs["pixel_values"]).shape, (2, 2, 3, 364, 364))
self.assertEqual(np.array(inputs["pixel_attention_mask"]).shape, (2, 2, 364, 364))
# Copied from tests.models.idefics2.test_processor_idefics2.Idefics2ProcessorTest.test_process_interleaved_images_prompts_image_error
# Copied from tests.models.idefics2.test_processing_idefics2.Idefics2ProcessorTest.test_process_interleaved_images_prompts_image_error
def test_process_interleaved_images_prompts_image_error(self):
processor = self.get_processor()

View File

@@ -97,7 +97,7 @@ class InternVLProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def tearDownClass(cls):
shutil.rmtree(cls.tmpdirname, ignore_errors=True)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -66,7 +66,7 @@ class LlavaNextProcessorTest(ProcessorTesterMixin, unittest.TestCase):
"vision_feature_select_strategy": "default"
} # fmt: skip
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"
@@ -79,7 +79,7 @@ class LlavaNextProcessorTest(ProcessorTesterMixin, unittest.TestCase):
self.assertTrue("num_image_patches" in output)
self.assertEqual(len(output["num_image_patches"]), 3)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_chat_template_is_saved
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_chat_template_is_saved
def test_chat_template_is_saved(self):
processor_loaded = self.processor_class.from_pretrained(self.tmpdirname)
processor_dict_loaded = json.loads(processor_loaded.to_json_string())

View File

@@ -75,7 +75,7 @@ class LlavaNextVideoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
"vision_feature_select_strategy": "default",
}
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"
@@ -88,7 +88,7 @@ class LlavaNextVideoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
self.assertTrue("num_image_patches" in output)
self.assertEqual(len(output["num_image_patches"]), 3)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_chat_template_is_saved
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_chat_template_is_saved
def test_chat_template_is_saved(self):
processor_loaded = self.processor_class.from_pretrained(self.tmpdirname)
processor_dict_loaded = json.loads(processor_loaded.to_json_string())

View File

@@ -79,7 +79,7 @@ class LlavaOnevisionProcessorTest(ProcessorTesterMixin, unittest.TestCase):
"vision_feature_select_strategy": "default"
} # fmt: skip
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"
@@ -92,7 +92,7 @@ class LlavaOnevisionProcessorTest(ProcessorTesterMixin, unittest.TestCase):
self.assertTrue("num_image_patches" in output)
self.assertEqual(len(output["num_image_patches"]), 3)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_chat_template_is_saved
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_chat_template_is_saved
def test_chat_template_is_saved(self):
processor_loaded = self.processor_class.from_pretrained(self.tmpdirname)
processor_dict_loaded = json.loads(processor_loaded.to_json_string())

View File

@@ -50,7 +50,7 @@ def floats_list(shape, scale=1.0, rng=None, name=None):
@require_torch
@require_sentencepiece
@require_torchaudio
# Copied from tests.models.musicgen.test_processor_musicgen.MusicgenProcessorTest with Musicgen->MusicgenMelody, Encodec->MusicgenMelody, padding_mask->attention_mask, input_values->input_features
# Copied from tests.models.musicgen.test_processing_musicgen.MusicgenProcessorTest with Musicgen->MusicgenMelody, Encodec->MusicgenMelody, padding_mask->attention_mask, input_values->input_features
class MusicgenMelodyProcessorTest(unittest.TestCase):
def setUp(self):
# Ignore copy

View File

@@ -48,7 +48,7 @@ class PaliGemmaProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def tearDownClass(cls):
shutil.rmtree(cls.tmpdirname, ignore_errors=True)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -65,7 +65,7 @@ class Qwen2_5_VLProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def tearDownClass(cls):
shutil.rmtree(cls.tmpdirname, ignore_errors=True)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -68,7 +68,7 @@ class Qwen2VLProcessorTest(ProcessorTesterMixin, unittest.TestCase):
def tearDownClass(cls):
shutil.rmtree(cls.tmpdirname, ignore_errors=True)
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
def test_get_num_vision_tokens(self):
"Tests general functionality of the helper used internally in vLLM"

View File

@@ -76,7 +76,7 @@ class SeamlessM4TProcessorTest(unittest.TestCase):
tokenizer_instance = isinstance(processor.tokenizer, (SeamlessM4TTokenizerFast, SeamlessM4TTokenizer))
self.assertTrue(tokenizer_instance)
# Copied from test.models.whisper.test_processor_whisper.WhisperProcessorTest.test_feature_extractor with Whisper->SeamlessM4T
# Copied from test.models.whisper.test_processing_whisper.WhisperProcessorTest.test_feature_extractor with Whisper->SeamlessM4T
def test_feature_extractor(self):
feature_extractor = self.get_feature_extractor()
tokenizer = self.get_tokenizer()
@@ -91,7 +91,7 @@ class SeamlessM4TProcessorTest(unittest.TestCase):
for key in input_feat_extract:
self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2)
# Copied from test.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer with Whisper->SeamlessM4T
# Copied from test.models.whisper.test_processing_whisper.WhisperProcessorTest.test_tokenizer with Whisper->SeamlessM4T
def test_tokenizer(self):
feature_extractor = self.get_feature_extractor()
tokenizer = self.get_tokenizer()
@@ -107,7 +107,7 @@ class SeamlessM4TProcessorTest(unittest.TestCase):
for key in encoded_tok:
self.assertListEqual(encoded_tok[key], encoded_processor[key])
# Copied from test.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->SeamlessM4T
# Copied from test.models.whisper.test_processing_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->SeamlessM4T
def test_tokenizer_decode(self):
feature_extractor = self.get_feature_extractor()
tokenizer = self.get_tokenizer()

View File

@@ -297,7 +297,7 @@ class SmolVLMProcessorTest(ProcessorTesterMixin, unittest.TestCase):
self.assertEqual(np.array(inputs["pixel_values"]).shape, (2, 2, 3, 512, 512))
self.assertEqual(np.array(inputs["pixel_attention_mask"]).shape, (2, 2, 512, 512))
# Copied from tests.models.idefics2.test_processor_idefics2.Idefics2ProcessorTest.test_process_interleaved_images_prompts_image_error
# Copied from tests.models.idefics2.test_processing_idefics2.Idefics2ProcessorTest.test_process_interleaved_images_prompts_image_error
def test_process_interleaved_images_prompts_image_error(self):
processor = self.get_processor()

File diff suppressed because it is too large Load Diff

View File

@@ -17,12 +17,12 @@ import glob
import importlib
import os
import re
import subprocess
from abc import ABC, abstractmethod
from collections import Counter, defaultdict, deque
from typing import Optional, Union
import libcst as cst
from check_copies import run_ruff
from create_dependency_mapping import find_priority_list
from libcst import ClassDef, CSTVisitor
from libcst import matchers as m
@@ -1676,6 +1676,16 @@ def create_modules(modular_mapper: ModularFileMapper) -> dict[str, cst.Module]:
return files
def run_ruff(code, check=False):
if check:
command = ["ruff", "check", "-", "--fix", "--exit-zero"]
else:
command = ["ruff", "format", "-", "--config", "pyproject.toml", "--silent"]
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
stdout, _ = process.communicate(input=code.encode())
return stdout.decode()
def convert_modular_file(modular_file):
pattern = re.search(r"modular_(.*)(?=\.py$)", modular_file)
output = {}