Rework add-new-model-like with modular and make test filenames coherent (#39612)

* remove tf/flax * fix * style * Update add_new_model_like.py * work in progress * continue * more cleanup * simplify and first final version * fixes -> it works * add linter checks * Update add_new_model_like.py * fix * add modular conversion at the end * Update add_new_model_like.py * add video processor * Update add_new_model_like.py * Update add_new_model_like.py * Update add_new_model_like.py * fix * Update image_processing_auto.py * Update image_processing_auto.py * fix post rebase * start test filenames replacement * rename all test_processor -> test_processing * fix copied from * add docstrings * Update add_new_model_like.py * fix regex * improve wording * Update add_new_model_like.py * Update add_new_model_like.py * Update add_new_model_like.py * start adding test * fix * fix * proper first test * tests * fix * fix * fix * fix * modular can be used from anywhere * protect import * fix * Update add_new_model_like.py * fix
2025-08-04 14:41:09 +02:00
parent 5fb5b6cfaf
commit 380b2a0317
87 changed files with 1438 additions and 2869 deletions
--- a/conftest.py
+++ b/conftest.py
@@ -29,7 +29,6 @@ from transformers.testing_utils import HfDoctestModule, HfDocTestParser
 NOT_DEVICE_TESTS = {
    "test_tokenization",
    "test_tokenization_mistral_common",
    "test_processor",
    "test_processing",
    "test_beam_constraints",
    "test_configuration_utils",
--- a/src/transformers/commands/add_new_model_like.py
+++ b/src/transformers/commands/add_new_model_like.py
--- a/src/transformers/models/auto/image_processing_auto.py
+++ b/src/transformers/models/auto/image_processing_auto.py
@@ -62,7 +62,7 @@ else:
            ("aimv2", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
            ("aimv2_vision_model", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
            ("align", ("EfficientNetImageProcessor", "EfficientNetImageProcessorFast")),
-            ("aria", ("AriaImageProcessor")),
+            ("aria", ("AriaImageProcessor", None)),
            ("beit", ("BeitImageProcessor", "BeitImageProcessorFast")),
            ("bit", ("BitImageProcessor", "BitImageProcessorFast")),
            ("blip", ("BlipImageProcessor", "BlipImageProcessorFast")),
@@ -72,7 +72,7 @@ else:
            ("chinese_clip", ("ChineseCLIPImageProcessor", "ChineseCLIPImageProcessorFast")),
            ("clip", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
            ("clipseg", ("ViTImageProcessor", "ViTImageProcessorFast")),
-            ("cohere2_vision", ("Cohere2VisionImageProcessorFast",)),
+            ("cohere2_vision", (None, "Cohere2VisionImageProcessorFast")),
            ("conditional_detr", ("ConditionalDetrImageProcessor", "ConditionalDetrImageProcessorFast")),
            ("convnext", ("ConvNextImageProcessor", "ConvNextImageProcessorFast")),
            ("convnextv2", ("ConvNextImageProcessor", "ConvNextImageProcessorFast")),
@@ -84,52 +84,52 @@ else:
            ("deit", ("DeiTImageProcessor", "DeiTImageProcessorFast")),
            ("depth_anything", ("DPTImageProcessor", "DPTImageProcessorFast")),
            ("depth_pro", ("DepthProImageProcessor", "DepthProImageProcessorFast")),
-            ("deta", ("DetaImageProcessor",)),
+            ("deta", ("DetaImageProcessor", None)),
            ("detr", ("DetrImageProcessor", "DetrImageProcessorFast")),
            ("dinat", ("ViTImageProcessor", "ViTImageProcessorFast")),
            ("dinov2", ("BitImageProcessor", "BitImageProcessorFast")),
            ("donut-swin", ("DonutImageProcessor", "DonutImageProcessorFast")),
            ("dpt", ("DPTImageProcessor", "DPTImageProcessorFast")),
-            ("efficientformer", ("EfficientFormerImageProcessor",)),
+            ("efficientformer", ("EfficientFormerImageProcessor", None)),
-            ("efficientloftr", ("EfficientLoFTRImageProcessor",)),
+            ("efficientloftr", ("EfficientLoFTRImageProcessor", None)),
            ("efficientnet", ("EfficientNetImageProcessor", "EfficientNetImageProcessorFast")),
            ("eomt", ("EomtImageProcessor", "EomtImageProcessorFast")),
            ("flava", ("FlavaImageProcessor", "FlavaImageProcessorFast")),
            ("focalnet", ("BitImageProcessor", "BitImageProcessorFast")),
-            ("fuyu", ("FuyuImageProcessor",)),
+            ("fuyu", ("FuyuImageProcessor", None)),
            ("gemma3", ("Gemma3ImageProcessor", "Gemma3ImageProcessorFast")),
            ("gemma3n", ("SiglipImageProcessor", "SiglipImageProcessorFast")),
            ("git", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
            ("glm4v", ("Glm4vImageProcessor", "Glm4vImageProcessorFast")),
-            ("glpn", ("GLPNImageProcessor",)),
+            ("glpn", ("GLPNImageProcessor", None)),
            ("got_ocr2", ("GotOcr2ImageProcessor", "GotOcr2ImageProcessorFast")),
            ("grounding-dino", ("GroundingDinoImageProcessor", "GroundingDinoImageProcessorFast")),
            ("groupvit", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
            ("hiera", ("BitImageProcessor", "BitImageProcessorFast")),
-            ("idefics", ("IdeficsImageProcessor",)),
+            ("idefics", ("IdeficsImageProcessor", None)),
            ("idefics2", ("Idefics2ImageProcessor", "Idefics2ImageProcessorFast")),
            ("idefics3", ("Idefics3ImageProcessor", "Idefics3ImageProcessorFast")),
            ("ijepa", ("ViTImageProcessor", "ViTImageProcessorFast")),
-            ("imagegpt", ("ImageGPTImageProcessor",)),
+            ("imagegpt", ("ImageGPTImageProcessor", None)),
            ("instructblip", ("BlipImageProcessor", "BlipImageProcessorFast")),
-            ("instructblipvideo", ("InstructBlipVideoImageProcessor",)),
+            ("instructblipvideo", ("InstructBlipVideoImageProcessor", None)),
            ("janus", ("JanusImageProcessor", "JanusImageProcessorFast")),
            ("kosmos-2", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
            ("layoutlmv2", ("LayoutLMv2ImageProcessor", "LayoutLMv2ImageProcessorFast")),
            ("layoutlmv3", ("LayoutLMv3ImageProcessor", "LayoutLMv3ImageProcessorFast")),
            ("levit", ("LevitImageProcessor", "LevitImageProcessorFast")),
-            ("lightglue", ("LightGlueImageProcessor",)),
+            ("lightglue", ("LightGlueImageProcessor", None)),
            ("llama4", ("Llama4ImageProcessor", "Llama4ImageProcessorFast")),
            ("llava", ("LlavaImageProcessor", "LlavaImageProcessorFast")),
            ("llava_next", ("LlavaNextImageProcessor", "LlavaNextImageProcessorFast")),
-            ("llava_next_video", ("LlavaNextVideoImageProcessor",)),
+            ("llava_next_video", ("LlavaNextVideoImageProcessor", None)),
            ("llava_onevision", ("LlavaOnevisionImageProcessor", "LlavaOnevisionImageProcessorFast")),
            ("mask2former", ("Mask2FormerImageProcessor", "Mask2FormerImageProcessorFast")),
            ("maskformer", ("MaskFormerImageProcessor", "MaskFormerImageProcessorFast")),
            ("mgp-str", ("ViTImageProcessor", "ViTImageProcessorFast")),
            ("mistral3", ("PixtralImageProcessor", "PixtralImageProcessorFast")),
            ("mlcd", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
-            ("mllama", ("MllamaImageProcessor",)),
+            ("mllama", ("MllamaImageProcessor", None)),
            ("mm-grounding-dino", ("GroundingDinoImageProcessor", "GroundingDinoImageProcessorFast")),
            ("mobilenet_v1", ("MobileNetV1ImageProcessor", "MobileNetV1ImageProcessorFast")),
            ("mobilenet_v2", ("MobileNetV2ImageProcessor", "MobileNetV2ImageProcessorFast")),
@@ -142,12 +142,12 @@ else:
            ("owlvit", ("OwlViTImageProcessor", "OwlViTImageProcessorFast")),
            ("paligemma", ("SiglipImageProcessor", "SiglipImageProcessorFast")),
            ("perceiver", ("PerceiverImageProcessor", "PerceiverImageProcessorFast")),
-            ("perception_lm", ("PerceptionLMImageProcessorFast",)),
+            ("perception_lm", (None, "PerceptionLMImageProcessorFast")),
-            ("phi4_multimodal", ("Phi4MultimodalImageProcessorFast",)),
+            ("phi4_multimodal", (None, "Phi4MultimodalImageProcessorFast")),
-            ("pix2struct", ("Pix2StructImageProcessor",)),
+            ("pix2struct", ("Pix2StructImageProcessor", None)),
            ("pixtral", ("PixtralImageProcessor", "PixtralImageProcessorFast")),
            ("poolformer", ("PoolFormerImageProcessor", "PoolFormerImageProcessorFast")),
-            ("prompt_depth_anything", ("PromptDepthAnythingImageProcessor",)),
+            ("prompt_depth_anything", ("PromptDepthAnythingImageProcessor", None)),
            ("pvt", ("PvtImageProcessor", "PvtImageProcessorFast")),
            ("pvt_v2", ("PvtImageProcessor", "PvtImageProcessorFast")),
            ("qwen2_5_vl", ("Qwen2VLImageProcessor", "Qwen2VLImageProcessorFast")),
@@ -157,39 +157,31 @@ else:
            ("rt_detr", ("RTDetrImageProcessor", "RTDetrImageProcessorFast")),
            ("sam", ("SamImageProcessor", "SamImageProcessorFast")),
            ("sam_hq", ("SamImageProcessor", "SamImageProcessorFast")),
            ("segformer", ("SegformerImageProcessor",)),
            ("segformer", ("SegformerImageProcessor", "SegformerImageProcessorFast")),
-            ("seggpt", ("SegGptImageProcessor",)),
+            ("seggpt", ("SegGptImageProcessor", None)),
            ("shieldgemma2", ("Gemma3ImageProcessor", "Gemma3ImageProcessorFast")),
            ("siglip", ("SiglipImageProcessor", "SiglipImageProcessorFast")),
            ("siglip2", ("Siglip2ImageProcessor", "Siglip2ImageProcessorFast")),
            ("smolvlm", ("SmolVLMImageProcessor", "SmolVLMImageProcessorFast")),
-            ("superglue", ("SuperGlueImageProcessor",)),
+            ("superglue", ("SuperGlueImageProcessor", None)),
-            (
+            ("superpoint", ("SuperPointImageProcessor", "SuperPointImageProcessorFast")),
                "superpoint",
                (
                    "SuperPointImageProcessor",
                    "SuperPointImageProcessorFast",
                ),
            ),
            ("swiftformer", ("ViTImageProcessor", "ViTImageProcessorFast")),
            ("swin", ("ViTImageProcessor", "ViTImageProcessorFast")),
            ("swin2sr", ("Swin2SRImageProcessor", "Swin2SRImageProcessorFast")),
            ("swinv2", ("ViTImageProcessor", "ViTImageProcessorFast")),
-            ("table-transformer", ("DetrImageProcessor",)),
+            ("table-transformer", ("DetrImageProcessor", None)),
-            ("timesformer", ("VideoMAEImageProcessor",)),
+            ("timesformer", ("VideoMAEImageProcessor", None)),
-            ("timm_wrapper", ("TimmWrapperImageProcessor",)),
+            ("timm_wrapper", ("TimmWrapperImageProcessor", None)),
-            ("tvlt", ("TvltImageProcessor",)),
+            ("tvlt", ("TvltImageProcessor", None)),
-            ("tvp", ("TvpImageProcessor",)),
+            ("tvp", ("TvpImageProcessor", None)),
            ("udop", ("LayoutLMv3ImageProcessor", "LayoutLMv3ImageProcessorFast")),
            ("udop", ("LayoutLMv3ImageProcessor",)),
            ("upernet", ("SegformerImageProcessor", "SegformerImageProcessorFast")),
            ("van", ("ConvNextImageProcessor", "ConvNextImageProcessorFast")),
-            ("videomae", ("VideoMAEImageProcessor",)),
+            ("videomae", ("VideoMAEImageProcessor", None)),
            ("vilt", ("ViltImageProcessor", "ViltImageProcessorFast")),
            ("vipllava", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
            ("vit", ("ViTImageProcessor", "ViTImageProcessorFast")),
-            ("vit_hybrid", ("ViTHybridImageProcessor",)),
+            ("vit_hybrid", ("ViTHybridImageProcessor", None)),
            ("vit_mae", ("ViTImageProcessor", "ViTImageProcessorFast")),
            ("vit_msn", ("ViTImageProcessor", "ViTImageProcessorFast")),
            ("vitmatte", ("VitMatteImageProcessor", "VitMatteImageProcessorFast")),
@@ -199,18 +191,14 @@ else:
        ]
    )
-for model_type, image_processors in IMAGE_PROCESSOR_MAPPING_NAMES.items():
+# Override to None if the packages are not available
-    slow_image_processor_class, *fast_image_processor_class = image_processors
+for model_type, (slow_class, fast_class) in IMAGE_PROCESSOR_MAPPING_NAMES.items():
    if not is_vision_available():
-        slow_image_processor_class = None
+        slow_class = None
    if not is_torchvision_available():
        fast_class = None
-    # If the fast image processor is not defined, or torchvision is not available, we set it to None
+    IMAGE_PROCESSOR_MAPPING_NAMES[model_type] = (slow_class, fast_class)
    if not fast_image_processor_class or fast_image_processor_class[0] is None or not is_torchvision_available():
        fast_image_processor_class = None
    else:
        fast_image_processor_class = fast_image_processor_class[0]
    IMAGE_PROCESSOR_MAPPING_NAMES[model_type] = (slow_image_processor_class, fast_image_processor_class)
 IMAGE_PROCESSOR_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, IMAGE_PROCESSOR_MAPPING_NAMES)
--- a/src/transformers/utils/init.py
+++ b/src/transformers/utils/init.py
@@ -179,6 +179,7 @@ from .import_utils import (
    is_keras_nlp_available,
    is_kernels_available,
    is_levenshtein_available,
    is_libcst_available,
    is_librosa_available,
    is_liger_kernel_available,
    is_lomo_available,
--- a/src/transformers/utils/import_utils.py
+++ b/src/transformers/utils/import_utils.py
@@ -120,6 +120,7 @@ _vptq_available, _vptq_version = _is_package_available("vptq", return_version=Tr
 _av_available = importlib.util.find_spec("av") is not None
 _decord_available = importlib.util.find_spec("decord") is not None
 _torchcodec_available = importlib.util.find_spec("torchcodec") is not None
 _libcst_available = _is_package_available("libcst")
 _bitsandbytes_available = _is_package_available("bitsandbytes")
 _eetq_available = _is_package_available("eetq")
 _fbgemm_gpu_available = _is_package_available("fbgemm_gpu")
@@ -379,6 +380,10 @@ def is_torch_available():
    return _torch_available
 def is_libcst_available():
    return _libcst_available
 def is_accelerate_available(min_version: str = ACCELERATE_MIN_VERSION):
    return _accelerate_available and version.parse(_accelerate_version) >= version.parse(min_version)
--- a/tests/models/align/test_processing_align.py
+++ b/tests/models/align/test_processing_align.py
--- a/tests/models/altclip/test_processing_altclip.py
+++ b/tests/models/altclip/test_processing_altclip.py
--- a/tests/models/aria/test_processing_aria.py
+++ b/tests/models/aria/test_processing_aria.py
@@ -95,7 +95,7 @@ class AriaProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def tearDownClass(cls):
        shutil.rmtree(cls.tmpdirname, ignore_errors=True)
-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"
--- a/tests/models/aya_vision/test_processing_aya_vision.py
+++ b/tests/models/aya_vision/test_processing_aya_vision.py
@@ -80,7 +80,7 @@ class AyaVisionProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def tearDownClass(cls):
        shutil.rmtree(cls.tmpdirname, ignore_errors=True)
-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"
--- a/tests/models/bark/test_processing_bark.py
+++ b/tests/models/bark/test_processing_bark.py
--- a/tests/models/blip/test_processing_blip.py
+++ b/tests/models/blip/test_processing_blip.py
--- a/tests/models/blip_2/test_processing_blip_2.py
+++ b/tests/models/blip_2/test_processing_blip_2.py
--- a/tests/models/bridgetower/test_processing_bridgetower.py
+++ b/tests/models/bridgetower/test_processing_bridgetower.py
--- a/tests/models/chameleon/test_processing_chameleon.py
+++ b/tests/models/chameleon/test_processing_chameleon.py
@@ -75,7 +75,7 @@ class ChameleonProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def prepare_processor_dict():
        return {"image_seq_length": 2}  # fmt: skip
-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"
--- a/tests/models/chinese_clip/test_processing_chinese_clip.py
+++ b/tests/models/chinese_clip/test_processing_chinese_clip.py
--- a/tests/models/clap/test_processing_clap.py
+++ b/tests/models/clap/test_processing_clap.py
--- a/tests/models/clip/test_processing_clip.py
+++ b/tests/models/clip/test_processing_clip.py
--- a/tests/models/clipseg/test_processing_clipseg.py
+++ b/tests/models/clipseg/test_processing_clipseg.py
--- a/tests/models/clvp/test_processing_clvp.py
+++ b/tests/models/clvp/test_processing_clvp.py
@@ -35,15 +35,15 @@ class ClvpProcessorTest(unittest.TestCase):
        shutil.rmtree(self.tmpdirname)
        gc.collect()
-    # Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.get_tokenizer with Whisper->Clvp
+    # Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.get_tokenizer with Whisper->Clvp
    def get_tokenizer(self, **kwargs):
        return ClvpTokenizer.from_pretrained(self.checkpoint, **kwargs)
-    # Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.get_feature_extractor with Whisper->Clvp
+    # Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.get_feature_extractor with Whisper->Clvp
    def get_feature_extractor(self, **kwargs):
        return ClvpFeatureExtractor.from_pretrained(self.checkpoint, **kwargs)
-    # Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.test_save_load_pretrained_default with Whisper->Clvp
+    # Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_save_load_pretrained_default with Whisper->Clvp
    def test_save_load_pretrained_default(self):
        tokenizer = self.get_tokenizer()
        feature_extractor = self.get_feature_extractor()
@@ -59,7 +59,7 @@ class ClvpProcessorTest(unittest.TestCase):
        self.assertEqual(processor.feature_extractor.to_json_string(), feature_extractor.to_json_string())
        self.assertIsInstance(processor.feature_extractor, ClvpFeatureExtractor)
-    # Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.test_feature_extractor with Whisper->Clvp,processor(raw_speech->processor(raw_speech=raw_speech
+    # Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_feature_extractor with Whisper->Clvp,processor(raw_speech->processor(raw_speech=raw_speech
    def test_feature_extractor(self):
        feature_extractor = self.get_feature_extractor()
        tokenizer = self.get_tokenizer()
@@ -74,7 +74,7 @@ class ClvpProcessorTest(unittest.TestCase):
        for key in input_feat_extract:
            self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2)
-    # Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer with Whisper->Clvp
+    # Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_tokenizer with Whisper->Clvp
    def test_tokenizer(self):
        feature_extractor = self.get_feature_extractor()
        tokenizer = self.get_tokenizer()
@@ -90,7 +90,7 @@ class ClvpProcessorTest(unittest.TestCase):
        for key in encoded_tok:
            self.assertListEqual(encoded_tok[key], encoded_processor[key])
-    # Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->Clvp
+    # Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->Clvp
    def test_tokenizer_decode(self):
        feature_extractor = self.get_feature_extractor()
        tokenizer = self.get_tokenizer()
--- a/tests/models/colpali/test_processing_colpali.py
+++ b/tests/models/colpali/test_processing_colpali.py
@@ -54,7 +54,7 @@ class ColPaliProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def tearDownClass(cls):
        shutil.rmtree(cls.tmpdirname, ignore_errors=True)
-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"
--- a/tests/models/colqwen2/test_processing_colqwen2.py
+++ b/tests/models/colqwen2/test_processing_colqwen2.py
@@ -57,7 +57,7 @@ class ColQwen2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def tearDownClass(cls):
        shutil.rmtree(cls.tmpdirname)
-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"
--- a/tests/models/csm/test_processing_csm.py
+++ b/tests/models/csm/test_processing_csm.py
--- a/tests/models/deepseek_vl/test_processing_deepseek_vl.py
+++ b/tests/models/deepseek_vl/test_processing_deepseek_vl.py
--- a/tests/models/deepseek_vl_hybrid/test_processing_deepseek_vl_hybrid.py
+++ b/tests/models/deepseek_vl_hybrid/test_processing_deepseek_vl_hybrid.py
--- a/tests/models/dia/test_processing_dia.py
+++ b/tests/models/dia/test_processing_dia.py
--- a/tests/models/donut/test_processing_donut.py
+++ b/tests/models/donut/test_processing_donut.py
--- a/tests/models/emu3/test_processing_emu3.py
+++ b/tests/models/emu3/test_processing_emu3.py
@@ -91,7 +91,7 @@ class Emu3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        # For an image where pixels go from 0 to 255 the diff can be 1 due to some numerical precision errors when scaling and unscaling
        self.assertTrue(np.abs(orig_image - unnormalized_images).max() >= 1)
-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"
--- a/tests/models/evolla/test_processing_evolla.py
+++ b/tests/models/evolla/test_processing_evolla.py
--- a/tests/models/flava/test_processing_flava.py
+++ b/tests/models/flava/test_processing_flava.py
--- a/tests/models/fuyu/test_processing_fuyu.py
+++ b/tests/models/fuyu/test_processing_fuyu.py
@@ -64,7 +64,7 @@ class FuyuProcessingTest(ProcessorTesterMixin, unittest.TestCase):
    def get_image_processor(self, **kwargs):
        return AutoProcessor.from_pretrained(self.tmpdirname, **kwargs).image_processor
-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"
--- a/tests/models/gemma3/test_processing_gemma3.py
+++ b/tests/models/gemma3/test_processing_gemma3.py
@@ -58,7 +58,7 @@ class Gemma3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        processor.save_pretrained(cls.tmpdirname)
        cls.image_token = processor.boi_token
-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"
--- a/tests/models/git/test_processing_git.py
+++ b/tests/models/git/test_processing_git.py
--- a/tests/models/got_ocr2/test_processing_got_ocr2.py
+++ b/tests/models/got_ocr2/test_processing_got_ocr2.py
--- a/tests/models/granite_speech/test_processing_granite_speech.py
+++ b/tests/models/granite_speech/test_processing_granite_speech.py
--- a/tests/models/grounding_dino/test_processing_grounding_dino.py
+++ b/tests/models/grounding_dino/test_processing_grounding_dino.py
@@ -94,17 +94,17 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        return [labels, labels_longer] + [labels] * (batch_size - 2)
    @classmethod
-    # Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.get_tokenizer with CLIP->Bert
+    # Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.get_tokenizer with CLIP->Bert
    def get_tokenizer(cls, **kwargs):
        return BertTokenizer.from_pretrained(cls.tmpdirname, **kwargs)
    @classmethod
-    # Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.get_rust_tokenizer with CLIP->Bert
+    # Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.get_rust_tokenizer with CLIP->Bert
    def get_rust_tokenizer(cls, **kwargs):
        return BertTokenizerFast.from_pretrained(cls.tmpdirname, **kwargs)
    @classmethod
-    # Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.get_image_processor with CLIP->GroundingDino
+    # Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.get_image_processor with CLIP->GroundingDino
    def get_image_processor(cls, **kwargs):
        return GroundingDinoImageProcessor.from_pretrained(cls.tmpdirname, **kwargs)
@@ -145,7 +145,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        expected_box_slice = torch.tensor([0.6908, 0.4354, 1.0737, 1.3947])
        torch.testing.assert_close(post_processed[0]["boxes"][0], expected_box_slice, rtol=1e-4, atol=1e-4)
-    # Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_save_load_pretrained_default with CLIP->GroundingDino,GroundingDinoTokenizer->BertTokenizer
+    # Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_save_load_pretrained_default with CLIP->GroundingDino,GroundingDinoTokenizer->BertTokenizer
    def test_save_load_pretrained_default(self):
        tokenizer_slow = self.get_tokenizer()
        tokenizer_fast = self.get_rust_tokenizer()
@@ -171,7 +171,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        self.assertIsInstance(processor_slow.image_processor, GroundingDinoImageProcessor)
        self.assertIsInstance(processor_fast.image_processor, GroundingDinoImageProcessor)
-    # Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_save_load_pretrained_additional_features with CLIP->GroundingDino,GroundingDinoTokenizer->BertTokenizer
+    # Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_save_load_pretrained_additional_features with CLIP->GroundingDino,GroundingDinoTokenizer->BertTokenizer
    def test_save_load_pretrained_additional_features(self):
        with tempfile.TemporaryDirectory() as tmpdir:
            processor = GroundingDinoProcessor(
@@ -194,7 +194,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        self.assertEqual(processor.image_processor.to_json_string(), image_processor_add_kwargs.to_json_string())
        self.assertIsInstance(processor.image_processor, GroundingDinoImageProcessor)
-    # Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_image_processor with CLIP->GroundingDino
+    # Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_image_processor with CLIP->GroundingDino
    def test_image_processor(self):
        image_processor = self.get_image_processor()
        tokenizer = self.get_tokenizer()
@@ -209,7 +209,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        for key in input_image_proc:
            self.assertAlmostEqual(input_image_proc[key].sum(), input_processor[key].sum(), delta=1e-2)
-    # Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_tokenizer with CLIP->GroundingDino
+    # Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_tokenizer with CLIP->GroundingDino
    def test_tokenizer(self):
        image_processor = self.get_image_processor()
        tokenizer = self.get_tokenizer()
@@ -244,7 +244,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        with pytest.raises(ValueError):
            processor()
-    # Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_tokenizer_decode with CLIP->GroundingDino
+    # Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_tokenizer_decode with CLIP->GroundingDino
    def test_tokenizer_decode(self):
        image_processor = self.get_image_processor()
        tokenizer = self.get_tokenizer()
@@ -258,7 +258,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        self.assertListEqual(decoded_tok, decoded_processor)
-    # Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_model_input_names with CLIP->GroundingDino
+    # Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_model_input_names with CLIP->GroundingDino
    def test_model_input_names(self):
        image_processor = self.get_image_processor()
        tokenizer = self.get_tokenizer()
--- a/tests/models/idefics/test_processing_idefics.py
+++ b/tests/models/idefics/test_processing_idefics.py
--- a/tests/models/idefics2/test_processing_idefics2.py
+++ b/tests/models/idefics2/test_processing_idefics2.py
--- a/tests/models/idefics3/test_processing_idefics3.py
+++ b/tests/models/idefics3/test_processing_idefics3.py
@@ -84,7 +84,7 @@ class Idefics3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def prepare_processor_dict():
        return {"image_seq_len": 2}
-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"
@@ -284,7 +284,7 @@ class Idefics3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        self.assertEqual(np.array(inputs["pixel_values"]).shape, (2, 2, 3, 364, 364))
        self.assertEqual(np.array(inputs["pixel_attention_mask"]).shape, (2, 2, 364, 364))
-    # Copied from tests.models.idefics2.test_processor_idefics2.Idefics2ProcessorTest.test_process_interleaved_images_prompts_image_error
+    # Copied from tests.models.idefics2.test_processing_idefics2.Idefics2ProcessorTest.test_process_interleaved_images_prompts_image_error
    def test_process_interleaved_images_prompts_image_error(self):
        processor = self.get_processor()
--- a/tests/models/instructblip/test_processing_instructblip.py
+++ b/tests/models/instructblip/test_processing_instructblip.py
--- a/tests/models/instructblipvideo/test_processing_instructblipvideo.py
+++ b/tests/models/instructblipvideo/test_processing_instructblipvideo.py
--- a/tests/models/internvl/test_processing_internvl.py
+++ b/tests/models/internvl/test_processing_internvl.py
@@ -97,7 +97,7 @@ class InternVLProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def tearDownClass(cls):
        shutil.rmtree(cls.tmpdirname, ignore_errors=True)
-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"
--- a/tests/models/internvl/test_video_processing_internvl.py
+++ b/tests/models/internvl/test_video_processing_internvl.py
--- a/tests/models/janus/test_processing_janus.py
+++ b/tests/models/janus/test_processing_janus.py
--- a/tests/models/kosmos2/test_processing_kosmos2.py
+++ b/tests/models/kosmos2/test_processing_kosmos2.py
--- a/tests/models/layoutlmv2/test_processing_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_processing_layoutlmv2.py
--- a/tests/models/layoutlmv3/test_processing_layoutlmv3.py
+++ b/tests/models/layoutlmv3/test_processing_layoutlmv3.py
--- a/tests/models/layoutxlm/test_processing_layoutxlm.py
+++ b/tests/models/layoutxlm/test_processing_layoutxlm.py
--- a/tests/models/llama4/test_processing_llama4.py
+++ b/tests/models/llama4/test_processing_llama4.py
--- a/tests/models/llava/test_processing_llava.py
+++ b/tests/models/llava/test_processing_llava.py
--- a/tests/models/llava_next/test_processing_llava_next.py
+++ b/tests/models/llava_next/test_processing_llava_next.py
@@ -66,7 +66,7 @@ class LlavaNextProcessorTest(ProcessorTesterMixin, unittest.TestCase):
            "vision_feature_select_strategy": "default"
        }  # fmt: skip
-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"
@@ -79,7 +79,7 @@ class LlavaNextProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        self.assertTrue("num_image_patches" in output)
        self.assertEqual(len(output["num_image_patches"]), 3)
-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_chat_template_is_saved
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_chat_template_is_saved
    def test_chat_template_is_saved(self):
        processor_loaded = self.processor_class.from_pretrained(self.tmpdirname)
        processor_dict_loaded = json.loads(processor_loaded.to_json_string())
--- a/tests/models/llava_next_video/test_processing_llava_next_video.py
+++ b/tests/models/llava_next_video/test_processing_llava_next_video.py
@@ -75,7 +75,7 @@ class LlavaNextVideoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
            "vision_feature_select_strategy": "default",
        }
-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"
@@ -88,7 +88,7 @@ class LlavaNextVideoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        self.assertTrue("num_image_patches" in output)
        self.assertEqual(len(output["num_image_patches"]), 3)
-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_chat_template_is_saved
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_chat_template_is_saved
    def test_chat_template_is_saved(self):
        processor_loaded = self.processor_class.from_pretrained(self.tmpdirname)
        processor_dict_loaded = json.loads(processor_loaded.to_json_string())
--- a/tests/models/llava_onevision/test_processing_llava_onevision.py
+++ b/tests/models/llava_onevision/test_processing_llava_onevision.py
@@ -79,7 +79,7 @@ class LlavaOnevisionProcessorTest(ProcessorTesterMixin, unittest.TestCase):
            "vision_feature_select_strategy": "default"
        }  # fmt: skip
-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"
@@ -92,7 +92,7 @@ class LlavaOnevisionProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        self.assertTrue("num_image_patches" in output)
        self.assertEqual(len(output["num_image_patches"]), 3)
-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_chat_template_is_saved
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_chat_template_is_saved
    def test_chat_template_is_saved(self):
        processor_loaded = self.processor_class.from_pretrained(self.tmpdirname)
        processor_dict_loaded = json.loads(processor_loaded.to_json_string())
--- a/tests/models/markuplm/test_processing_markuplm.py
+++ b/tests/models/markuplm/test_processing_markuplm.py
--- a/tests/models/mgp_str/test_processing_mgp_str.py
+++ b/tests/models/mgp_str/test_processing_mgp_str.py
--- a/tests/models/mistral3/test_processing_mistral3.py
+++ b/tests/models/mistral3/test_processing_mistral3.py
--- a/tests/models/mllama/test_processing_mllama.py
+++ b/tests/models/mllama/test_processing_mllama.py
--- a/tests/models/musicgen/test_processing_musicgen.py
+++ b/tests/models/musicgen/test_processing_musicgen.py
--- a/tests/models/musicgen_melody/test_processing_musicgen_melody.py
+++ b/tests/models/musicgen_melody/test_processing_musicgen_melody.py
@@ -50,7 +50,7 @@ def floats_list(shape, scale=1.0, rng=None, name=None):
@require_torch
@require_sentencepiece
@require_torchaudio
-# Copied from tests.models.musicgen.test_processor_musicgen.MusicgenProcessorTest with Musicgen->MusicgenMelody, Encodec->MusicgenMelody, padding_mask->attention_mask, input_values->input_features
+# Copied from tests.models.musicgen.test_processing_musicgen.MusicgenProcessorTest with Musicgen->MusicgenMelody, Encodec->MusicgenMelody, padding_mask->attention_mask, input_values->input_features
 class MusicgenMelodyProcessorTest(unittest.TestCase):
    def setUp(self):
        # Ignore copy
--- a/tests/models/omdet_turbo/test_processing_omdet_turbo.py
+++ b/tests/models/omdet_turbo/test_processing_omdet_turbo.py
--- a/tests/models/oneformer/test_processing_oneformer.py
+++ b/tests/models/oneformer/test_processing_oneformer.py
--- a/tests/models/owlv2/test_processing_owlv2.py
+++ b/tests/models/owlv2/test_processing_owlv2.py
--- a/tests/models/owlvit/test_processing_owlvit.py
+++ b/tests/models/owlvit/test_processing_owlvit.py
--- a/tests/models/paligemma/test_processing_paligemma.py
+++ b/tests/models/paligemma/test_processing_paligemma.py
@@ -48,7 +48,7 @@ class PaliGemmaProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def tearDownClass(cls):
        shutil.rmtree(cls.tmpdirname, ignore_errors=True)
-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"
--- a/tests/models/perception_lm/test_processing_perception_lm.py
+++ b/tests/models/perception_lm/test_processing_perception_lm.py
--- a/tests/models/phi4_multimodal/test_feature_extraction_phi4_multimodal.py
+++ b/tests/models/phi4_multimodal/test_feature_extraction_phi4_multimodal.py
--- a/tests/models/pix2struct/test_processing_pix2struct.py
+++ b/tests/models/pix2struct/test_processing_pix2struct.py
--- a/tests/models/pixtral/test_processing_pixtral.py
+++ b/tests/models/pixtral/test_processing_pixtral.py
--- a/tests/models/pop2piano/test_processing_pop2piano.py
+++ b/tests/models/pop2piano/test_processing_pop2piano.py
--- a/tests/models/qwen2_5_omni/test_processing_qwen2_5_omni.py
+++ b/tests/models/qwen2_5_omni/test_processing_qwen2_5_omni.py
--- a/tests/models/qwen2_5_vl/test_processing_qwen2_5_vl.py
+++ b/tests/models/qwen2_5_vl/test_processing_qwen2_5_vl.py
@@ -65,7 +65,7 @@ class Qwen2_5_VLProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def tearDownClass(cls):
        shutil.rmtree(cls.tmpdirname, ignore_errors=True)
-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"
--- a/tests/models/qwen2_audio/test_processing_qwen2_audio.py
+++ b/tests/models/qwen2_audio/test_processing_qwen2_audio.py
--- a/tests/models/qwen2_vl/test_processing_qwen2_vl.py
+++ b/tests/models/qwen2_vl/test_processing_qwen2_vl.py
@@ -68,7 +68,7 @@ class Qwen2VLProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def tearDownClass(cls):
        shutil.rmtree(cls.tmpdirname, ignore_errors=True)
-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"
--- a/tests/models/sam/test_processing_sam.py
+++ b/tests/models/sam/test_processing_sam.py
--- a/tests/models/sam_hq/test_processing_samhq.py
+++ b/tests/models/sam_hq/test_processing_samhq.py
--- a/tests/models/seamless_m4t/test_processing_seamless_m4t.py
+++ b/tests/models/seamless_m4t/test_processing_seamless_m4t.py
@@ -76,7 +76,7 @@ class SeamlessM4TProcessorTest(unittest.TestCase):
        tokenizer_instance = isinstance(processor.tokenizer, (SeamlessM4TTokenizerFast, SeamlessM4TTokenizer))
        self.assertTrue(tokenizer_instance)
-    # Copied from test.models.whisper.test_processor_whisper.WhisperProcessorTest.test_feature_extractor with Whisper->SeamlessM4T
+    # Copied from test.models.whisper.test_processing_whisper.WhisperProcessorTest.test_feature_extractor with Whisper->SeamlessM4T
    def test_feature_extractor(self):
        feature_extractor = self.get_feature_extractor()
        tokenizer = self.get_tokenizer()
@@ -91,7 +91,7 @@ class SeamlessM4TProcessorTest(unittest.TestCase):
        for key in input_feat_extract:
            self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2)
-    # Copied from test.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer with Whisper->SeamlessM4T
+    # Copied from test.models.whisper.test_processing_whisper.WhisperProcessorTest.test_tokenizer with Whisper->SeamlessM4T
    def test_tokenizer(self):
        feature_extractor = self.get_feature_extractor()
        tokenizer = self.get_tokenizer()
@@ -107,7 +107,7 @@ class SeamlessM4TProcessorTest(unittest.TestCase):
        for key in encoded_tok:
            self.assertListEqual(encoded_tok[key], encoded_processor[key])
-    # Copied from test.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->SeamlessM4T
+    # Copied from test.models.whisper.test_processing_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->SeamlessM4T
    def test_tokenizer_decode(self):
        feature_extractor = self.get_feature_extractor()
        tokenizer = self.get_tokenizer()
--- a/tests/models/smolvlm/test_processing_smolvlm.py
+++ b/tests/models/smolvlm/test_processing_smolvlm.py
@@ -297,7 +297,7 @@ class SmolVLMProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        self.assertEqual(np.array(inputs["pixel_values"]).shape, (2, 2, 3, 512, 512))
        self.assertEqual(np.array(inputs["pixel_attention_mask"]).shape, (2, 2, 512, 512))
-    # Copied from tests.models.idefics2.test_processor_idefics2.Idefics2ProcessorTest.test_process_interleaved_images_prompts_image_error
+    # Copied from tests.models.idefics2.test_processing_idefics2.Idefics2ProcessorTest.test_process_interleaved_images_prompts_image_error
    def test_process_interleaved_images_prompts_image_error(self):
        processor = self.get_processor()
--- a/tests/models/speech_to_text/test_processing_speech_to_text.py
+++ b/tests/models/speech_to_text/test_processing_speech_to_text.py
--- a/tests/models/speecht5/test_processing_speecht5.py
+++ b/tests/models/speecht5/test_processing_speecht5.py
--- a/tests/models/trocr/test_processing_trocr.py
+++ b/tests/models/trocr/test_processing_trocr.py
--- a/tests/models/udop/test_processing_udop.py
+++ b/tests/models/udop/test_processing_udop.py
--- a/tests/models/vision_text_dual_encoder/test_processing_vision_text_dual_encoder.py
+++ b/tests/models/vision_text_dual_encoder/test_processing_vision_text_dual_encoder.py
--- a/tests/models/wav2vec2/test_processing_wav2vec2.py
+++ b/tests/models/wav2vec2/test_processing_wav2vec2.py
--- a/tests/models/wav2vec2_bert/test_processing_wav2vec2_bert.py
+++ b/tests/models/wav2vec2_bert/test_processing_wav2vec2_bert.py
--- a/tests/models/wav2vec2_with_lm/test_processing_wav2vec2_with_lm.py
+++ b/tests/models/wav2vec2_with_lm/test_processing_wav2vec2_with_lm.py
--- a/tests/models/whisper/test_processing_whisper.py
+++ b/tests/models/whisper/test_processing_whisper.py
--- a/tests/utils/test_add_new_model_like.py
+++ b/tests/utils/test_add_new_model_like.py
--- a/utils/modular_model_converter.py
+++ b/utils/modular_model_converter.py
@@ -17,12 +17,12 @@ import glob
 import importlib
 import os
 import re
 import subprocess
 from abc import ABC, abstractmethod
 from collections import Counter, defaultdict, deque
 from typing import Optional, Union
 import libcst as cst
 from check_copies import run_ruff
 from create_dependency_mapping import find_priority_list
 from libcst import ClassDef, CSTVisitor
 from libcst import matchers as m
@@ -1676,6 +1676,16 @@ def create_modules(modular_mapper: ModularFileMapper) -> dict[str, cst.Module]:
    return files
 def run_ruff(code, check=False):
    if check:
        command = ["ruff", "check", "-", "--fix", "--exit-zero"]
    else:
        command = ["ruff", "format", "-", "--config", "pyproject.toml", "--silent"]
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
    stdout, _ = process.communicate(input=code.encode())
    return stdout.decode()
 def convert_modular_file(modular_file):
    pattern = re.search(r"modular_(.*)(?=\.py$)", modular_file)
    output = {}