Rework add-new-model-like with modular and make test filenames coherent (#39612)

* remove tf/flax * fix * style * Update add_new_model_like.py * work in progress * continue * more cleanup * simplify and first final version * fixes -> it works * add linter checks * Update add_new_model_like.py * fix * add modular conversion at the end * Update add_new_model_like.py * add video processor * Update add_new_model_like.py * Update add_new_model_like.py * Update add_new_model_like.py * fix * Update image_processing_auto.py * Update image_processing_auto.py * fix post rebase * start test filenames replacement * rename all test_processor -> test_processing * fix copied from * add docstrings * Update add_new_model_like.py * fix regex * improve wording * Update add_new_model_like.py * Update add_new_model_like.py * Update add_new_model_like.py * start adding test * fix * fix * proper first test * tests * fix * fix * fix * fix * modular can be used from anywhere * protect import * fix * Update add_new_model_like.py * fix
2025-08-04 14:41:09 +02:00
parent 5fb5b6cfaf
commit 380b2a0317
87 changed files with 1438 additions and 2869 deletions
--- a/conftest.py
+++ b/conftest.py
@@ -29,7 +29,6 @@ from transformers.testing_utils import HfDoctestModule, HfDocTestParser
 NOT_DEVICE_TESTS = {
    "test_tokenization",
    "test_tokenization_mistral_common",
-    "test_processor",
    "test_processing",
    "test_beam_constraints",
    "test_configuration_utils",
--- a/src/transformers/commands/add_new_model_like.py
+++ b/src/transformers/commands/add_new_model_like.py
--- a/src/transformers/models/auto/image_processing_auto.py
+++ b/src/transformers/models/auto/image_processing_auto.py
@@ -62,7 +62,7 @@ else:
            ("aimv2", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
            ("aimv2_vision_model", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
            ("align", ("EfficientNetImageProcessor", "EfficientNetImageProcessorFast")),
-            ("aria", ("AriaImageProcessor")),
+            ("aria", ("AriaImageProcessor", None)),
            ("beit", ("BeitImageProcessor", "BeitImageProcessorFast")),
            ("bit", ("BitImageProcessor", "BitImageProcessorFast")),
            ("blip", ("BlipImageProcessor", "BlipImageProcessorFast")),
@@ -72,7 +72,7 @@ else:
            ("chinese_clip", ("ChineseCLIPImageProcessor", "ChineseCLIPImageProcessorFast")),
            ("clip", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
            ("clipseg", ("ViTImageProcessor", "ViTImageProcessorFast")),
-            ("cohere2_vision", ("Cohere2VisionImageProcessorFast",)),
+            ("cohere2_vision", (None, "Cohere2VisionImageProcessorFast")),
            ("conditional_detr", ("ConditionalDetrImageProcessor", "ConditionalDetrImageProcessorFast")),
            ("convnext", ("ConvNextImageProcessor", "ConvNextImageProcessorFast")),
            ("convnextv2", ("ConvNextImageProcessor", "ConvNextImageProcessorFast")),
@@ -84,52 +84,52 @@ else:
            ("deit", ("DeiTImageProcessor", "DeiTImageProcessorFast")),
            ("depth_anything", ("DPTImageProcessor", "DPTImageProcessorFast")),
            ("depth_pro", ("DepthProImageProcessor", "DepthProImageProcessorFast")),
-            ("deta", ("DetaImageProcessor",)),
+            ("deta", ("DetaImageProcessor", None)),
            ("detr", ("DetrImageProcessor", "DetrImageProcessorFast")),
            ("dinat", ("ViTImageProcessor", "ViTImageProcessorFast")),
            ("dinov2", ("BitImageProcessor", "BitImageProcessorFast")),
            ("donut-swin", ("DonutImageProcessor", "DonutImageProcessorFast")),
            ("dpt", ("DPTImageProcessor", "DPTImageProcessorFast")),
-            ("efficientformer", ("EfficientFormerImageProcessor",)),
-            ("efficientloftr", ("EfficientLoFTRImageProcessor",)),
+            ("efficientformer", ("EfficientFormerImageProcessor", None)),
+            ("efficientloftr", ("EfficientLoFTRImageProcessor", None)),
            ("efficientnet", ("EfficientNetImageProcessor", "EfficientNetImageProcessorFast")),
            ("eomt", ("EomtImageProcessor", "EomtImageProcessorFast")),
            ("flava", ("FlavaImageProcessor", "FlavaImageProcessorFast")),
            ("focalnet", ("BitImageProcessor", "BitImageProcessorFast")),
-            ("fuyu", ("FuyuImageProcessor",)),
+            ("fuyu", ("FuyuImageProcessor", None)),
            ("gemma3", ("Gemma3ImageProcessor", "Gemma3ImageProcessorFast")),
            ("gemma3n", ("SiglipImageProcessor", "SiglipImageProcessorFast")),
            ("git", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
            ("glm4v", ("Glm4vImageProcessor", "Glm4vImageProcessorFast")),
-            ("glpn", ("GLPNImageProcessor",)),
+            ("glpn", ("GLPNImageProcessor", None)),
            ("got_ocr2", ("GotOcr2ImageProcessor", "GotOcr2ImageProcessorFast")),
            ("grounding-dino", ("GroundingDinoImageProcessor", "GroundingDinoImageProcessorFast")),
            ("groupvit", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
            ("hiera", ("BitImageProcessor", "BitImageProcessorFast")),
-            ("idefics", ("IdeficsImageProcessor",)),
+            ("idefics", ("IdeficsImageProcessor", None)),
            ("idefics2", ("Idefics2ImageProcessor", "Idefics2ImageProcessorFast")),
            ("idefics3", ("Idefics3ImageProcessor", "Idefics3ImageProcessorFast")),
            ("ijepa", ("ViTImageProcessor", "ViTImageProcessorFast")),
-            ("imagegpt", ("ImageGPTImageProcessor",)),
+            ("imagegpt", ("ImageGPTImageProcessor", None)),
            ("instructblip", ("BlipImageProcessor", "BlipImageProcessorFast")),
-            ("instructblipvideo", ("InstructBlipVideoImageProcessor",)),
+            ("instructblipvideo", ("InstructBlipVideoImageProcessor", None)),
            ("janus", ("JanusImageProcessor", "JanusImageProcessorFast")),
            ("kosmos-2", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
            ("layoutlmv2", ("LayoutLMv2ImageProcessor", "LayoutLMv2ImageProcessorFast")),
            ("layoutlmv3", ("LayoutLMv3ImageProcessor", "LayoutLMv3ImageProcessorFast")),
            ("levit", ("LevitImageProcessor", "LevitImageProcessorFast")),
-            ("lightglue", ("LightGlueImageProcessor",)),
+            ("lightglue", ("LightGlueImageProcessor", None)),
            ("llama4", ("Llama4ImageProcessor", "Llama4ImageProcessorFast")),
            ("llava", ("LlavaImageProcessor", "LlavaImageProcessorFast")),
            ("llava_next", ("LlavaNextImageProcessor", "LlavaNextImageProcessorFast")),
-            ("llava_next_video", ("LlavaNextVideoImageProcessor",)),
+            ("llava_next_video", ("LlavaNextVideoImageProcessor", None)),
            ("llava_onevision", ("LlavaOnevisionImageProcessor", "LlavaOnevisionImageProcessorFast")),
            ("mask2former", ("Mask2FormerImageProcessor", "Mask2FormerImageProcessorFast")),
            ("maskformer", ("MaskFormerImageProcessor", "MaskFormerImageProcessorFast")),
            ("mgp-str", ("ViTImageProcessor", "ViTImageProcessorFast")),
            ("mistral3", ("PixtralImageProcessor", "PixtralImageProcessorFast")),
            ("mlcd", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
-            ("mllama", ("MllamaImageProcessor",)),
+            ("mllama", ("MllamaImageProcessor", None)),
            ("mm-grounding-dino", ("GroundingDinoImageProcessor", "GroundingDinoImageProcessorFast")),
            ("mobilenet_v1", ("MobileNetV1ImageProcessor", "MobileNetV1ImageProcessorFast")),
            ("mobilenet_v2", ("MobileNetV2ImageProcessor", "MobileNetV2ImageProcessorFast")),
@@ -142,12 +142,12 @@ else:
            ("owlvit", ("OwlViTImageProcessor", "OwlViTImageProcessorFast")),
            ("paligemma", ("SiglipImageProcessor", "SiglipImageProcessorFast")),
            ("perceiver", ("PerceiverImageProcessor", "PerceiverImageProcessorFast")),
-            ("perception_lm", ("PerceptionLMImageProcessorFast",)),
-            ("phi4_multimodal", ("Phi4MultimodalImageProcessorFast",)),
-            ("pix2struct", ("Pix2StructImageProcessor",)),
+            ("perception_lm", (None, "PerceptionLMImageProcessorFast")),
+            ("phi4_multimodal", (None, "Phi4MultimodalImageProcessorFast")),
+            ("pix2struct", ("Pix2StructImageProcessor", None)),
            ("pixtral", ("PixtralImageProcessor", "PixtralImageProcessorFast")),
            ("poolformer", ("PoolFormerImageProcessor", "PoolFormerImageProcessorFast")),
-            ("prompt_depth_anything", ("PromptDepthAnythingImageProcessor",)),
+            ("prompt_depth_anything", ("PromptDepthAnythingImageProcessor", None)),
            ("pvt", ("PvtImageProcessor", "PvtImageProcessorFast")),
            ("pvt_v2", ("PvtImageProcessor", "PvtImageProcessorFast")),
            ("qwen2_5_vl", ("Qwen2VLImageProcessor", "Qwen2VLImageProcessorFast")),
@@ -157,39 +157,31 @@ else:
            ("rt_detr", ("RTDetrImageProcessor", "RTDetrImageProcessorFast")),
            ("sam", ("SamImageProcessor", "SamImageProcessorFast")),
            ("sam_hq", ("SamImageProcessor", "SamImageProcessorFast")),
-            ("segformer", ("SegformerImageProcessor",)),
            ("segformer", ("SegformerImageProcessor", "SegformerImageProcessorFast")),
-            ("seggpt", ("SegGptImageProcessor",)),
+            ("seggpt", ("SegGptImageProcessor", None)),
            ("shieldgemma2", ("Gemma3ImageProcessor", "Gemma3ImageProcessorFast")),
            ("siglip", ("SiglipImageProcessor", "SiglipImageProcessorFast")),
            ("siglip2", ("Siglip2ImageProcessor", "Siglip2ImageProcessorFast")),
            ("smolvlm", ("SmolVLMImageProcessor", "SmolVLMImageProcessorFast")),
-            ("superglue", ("SuperGlueImageProcessor",)),
-            (
-                "superpoint",
-                (
-                    "SuperPointImageProcessor",
-                    "SuperPointImageProcessorFast",
-                ),
-            ),
+            ("superglue", ("SuperGlueImageProcessor", None)),
+            ("superpoint", ("SuperPointImageProcessor", "SuperPointImageProcessorFast")),
            ("swiftformer", ("ViTImageProcessor", "ViTImageProcessorFast")),
            ("swin", ("ViTImageProcessor", "ViTImageProcessorFast")),
            ("swin2sr", ("Swin2SRImageProcessor", "Swin2SRImageProcessorFast")),
            ("swinv2", ("ViTImageProcessor", "ViTImageProcessorFast")),
-            ("table-transformer", ("DetrImageProcessor",)),
-            ("timesformer", ("VideoMAEImageProcessor",)),
-            ("timm_wrapper", ("TimmWrapperImageProcessor",)),
-            ("tvlt", ("TvltImageProcessor",)),
-            ("tvp", ("TvpImageProcessor",)),
+            ("table-transformer", ("DetrImageProcessor", None)),
+            ("timesformer", ("VideoMAEImageProcessor", None)),
+            ("timm_wrapper", ("TimmWrapperImageProcessor", None)),
+            ("tvlt", ("TvltImageProcessor", None)),
+            ("tvp", ("TvpImageProcessor", None)),
            ("udop", ("LayoutLMv3ImageProcessor", "LayoutLMv3ImageProcessorFast")),
-            ("udop", ("LayoutLMv3ImageProcessor",)),
            ("upernet", ("SegformerImageProcessor", "SegformerImageProcessorFast")),
            ("van", ("ConvNextImageProcessor", "ConvNextImageProcessorFast")),
-            ("videomae", ("VideoMAEImageProcessor",)),
+            ("videomae", ("VideoMAEImageProcessor", None)),
            ("vilt", ("ViltImageProcessor", "ViltImageProcessorFast")),
            ("vipllava", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
            ("vit", ("ViTImageProcessor", "ViTImageProcessorFast")),
-            ("vit_hybrid", ("ViTHybridImageProcessor",)),
+            ("vit_hybrid", ("ViTHybridImageProcessor", None)),
            ("vit_mae", ("ViTImageProcessor", "ViTImageProcessorFast")),
            ("vit_msn", ("ViTImageProcessor", "ViTImageProcessorFast")),
            ("vitmatte", ("VitMatteImageProcessor", "VitMatteImageProcessorFast")),
@@ -199,18 +191,14 @@ else:
        ]
    )

-for model_type, image_processors in IMAGE_PROCESSOR_MAPPING_NAMES.items():
-    slow_image_processor_class, *fast_image_processor_class = image_processors
+# Override to None if the packages are not available
+for model_type, (slow_class, fast_class) in IMAGE_PROCESSOR_MAPPING_NAMES.items():
    if not is_vision_available():
-        slow_image_processor_class = None
+        slow_class = None
+    if not is_torchvision_available():
+        fast_class = None

-    # If the fast image processor is not defined, or torchvision is not available, we set it to None
-    if not fast_image_processor_class or fast_image_processor_class[0] is None or not is_torchvision_available():
-        fast_image_processor_class = None
-    else:
-        fast_image_processor_class = fast_image_processor_class[0]
-
-    IMAGE_PROCESSOR_MAPPING_NAMES[model_type] = (slow_image_processor_class, fast_image_processor_class)
+    IMAGE_PROCESSOR_MAPPING_NAMES[model_type] = (slow_class, fast_class)

 IMAGE_PROCESSOR_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, IMAGE_PROCESSOR_MAPPING_NAMES)

--- a/src/transformers/utils/init.py
+++ b/src/transformers/utils/init.py
@@ -179,6 +179,7 @@ from .import_utils import (
    is_keras_nlp_available,
    is_kernels_available,
    is_levenshtein_available,
+    is_libcst_available,
    is_librosa_available,
    is_liger_kernel_available,
    is_lomo_available,
--- a/src/transformers/utils/import_utils.py
+++ b/src/transformers/utils/import_utils.py
@@ -120,6 +120,7 @@ _vptq_available, _vptq_version = _is_package_available("vptq", return_version=Tr
 _av_available = importlib.util.find_spec("av") is not None
 _decord_available = importlib.util.find_spec("decord") is not None
 _torchcodec_available = importlib.util.find_spec("torchcodec") is not None
+_libcst_available = _is_package_available("libcst")
 _bitsandbytes_available = _is_package_available("bitsandbytes")
 _eetq_available = _is_package_available("eetq")
 _fbgemm_gpu_available = _is_package_available("fbgemm_gpu")
@@ -379,6 +380,10 @@ def is_torch_available():
    return _torch_available


+def is_libcst_available():
+    return _libcst_available
+
+
 def is_accelerate_available(min_version: str = ACCELERATE_MIN_VERSION):
    return _accelerate_available and version.parse(_accelerate_version) >= version.parse(min_version)

--- a/tests/models/align/test_processing_align.py
+++ b/tests/models/align/test_processing_align.py
--- a/tests/models/altclip/test_processing_altclip.py
+++ b/tests/models/altclip/test_processing_altclip.py
--- a/tests/models/aria/test_processing_aria.py
+++ b/tests/models/aria/test_processing_aria.py
@@ -95,7 +95,7 @@ class AriaProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def tearDownClass(cls):
        shutil.rmtree(cls.tmpdirname, ignore_errors=True)

-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"

--- a/tests/models/aya_vision/test_processing_aya_vision.py
+++ b/tests/models/aya_vision/test_processing_aya_vision.py
@@ -80,7 +80,7 @@ class AyaVisionProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def tearDownClass(cls):
        shutil.rmtree(cls.tmpdirname, ignore_errors=True)

-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"

--- a/tests/models/bark/test_processing_bark.py
+++ b/tests/models/bark/test_processing_bark.py
--- a/tests/models/blip/test_processing_blip.py
+++ b/tests/models/blip/test_processing_blip.py
--- a/tests/models/blip_2/test_processing_blip_2.py
+++ b/tests/models/blip_2/test_processing_blip_2.py
--- a/tests/models/bridgetower/test_processing_bridgetower.py
+++ b/tests/models/bridgetower/test_processing_bridgetower.py
--- a/tests/models/chameleon/test_processing_chameleon.py
+++ b/tests/models/chameleon/test_processing_chameleon.py
@@ -75,7 +75,7 @@ class ChameleonProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def prepare_processor_dict():
        return {"image_seq_length": 2}  # fmt: skip

-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"

--- a/tests/models/chinese_clip/test_processing_chinese_clip.py
+++ b/tests/models/chinese_clip/test_processing_chinese_clip.py
--- a/tests/models/clap/test_processing_clap.py
+++ b/tests/models/clap/test_processing_clap.py
--- a/tests/models/clip/test_processing_clip.py
+++ b/tests/models/clip/test_processing_clip.py
--- a/tests/models/clipseg/test_processing_clipseg.py
+++ b/tests/models/clipseg/test_processing_clipseg.py
--- a/tests/models/clvp/test_processing_clvp.py
+++ b/tests/models/clvp/test_processing_clvp.py
@@ -35,15 +35,15 @@ class ClvpProcessorTest(unittest.TestCase):
        shutil.rmtree(self.tmpdirname)
        gc.collect()

-    # Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.get_tokenizer with Whisper->Clvp
+    # Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.get_tokenizer with Whisper->Clvp
    def get_tokenizer(self, **kwargs):
        return ClvpTokenizer.from_pretrained(self.checkpoint, **kwargs)

-    # Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.get_feature_extractor with Whisper->Clvp
+    # Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.get_feature_extractor with Whisper->Clvp
    def get_feature_extractor(self, **kwargs):
        return ClvpFeatureExtractor.from_pretrained(self.checkpoint, **kwargs)

-    # Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.test_save_load_pretrained_default with Whisper->Clvp
+    # Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_save_load_pretrained_default with Whisper->Clvp
    def test_save_load_pretrained_default(self):
        tokenizer = self.get_tokenizer()
        feature_extractor = self.get_feature_extractor()
@@ -59,7 +59,7 @@ class ClvpProcessorTest(unittest.TestCase):
        self.assertEqual(processor.feature_extractor.to_json_string(), feature_extractor.to_json_string())
        self.assertIsInstance(processor.feature_extractor, ClvpFeatureExtractor)

-    # Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.test_feature_extractor with Whisper->Clvp,processor(raw_speech->processor(raw_speech=raw_speech
+    # Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_feature_extractor with Whisper->Clvp,processor(raw_speech->processor(raw_speech=raw_speech
    def test_feature_extractor(self):
        feature_extractor = self.get_feature_extractor()
        tokenizer = self.get_tokenizer()
@@ -74,7 +74,7 @@ class ClvpProcessorTest(unittest.TestCase):
        for key in input_feat_extract:
            self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2)

-    # Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer with Whisper->Clvp
+    # Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_tokenizer with Whisper->Clvp
    def test_tokenizer(self):
        feature_extractor = self.get_feature_extractor()
        tokenizer = self.get_tokenizer()
@@ -90,7 +90,7 @@ class ClvpProcessorTest(unittest.TestCase):
        for key in encoded_tok:
            self.assertListEqual(encoded_tok[key], encoded_processor[key])

-    # Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->Clvp
+    # Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->Clvp
    def test_tokenizer_decode(self):
        feature_extractor = self.get_feature_extractor()
        tokenizer = self.get_tokenizer()
--- a/tests/models/colpali/test_processing_colpali.py
+++ b/tests/models/colpali/test_processing_colpali.py
@@ -54,7 +54,7 @@ class ColPaliProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def tearDownClass(cls):
        shutil.rmtree(cls.tmpdirname, ignore_errors=True)

-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"

--- a/tests/models/colqwen2/test_processing_colqwen2.py
+++ b/tests/models/colqwen2/test_processing_colqwen2.py
@@ -57,7 +57,7 @@ class ColQwen2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def tearDownClass(cls):
        shutil.rmtree(cls.tmpdirname)

-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"

--- a/tests/models/csm/test_processing_csm.py
+++ b/tests/models/csm/test_processing_csm.py
--- a/tests/models/deepseek_vl/test_processing_deepseek_vl.py
+++ b/tests/models/deepseek_vl/test_processing_deepseek_vl.py
--- a/tests/models/deepseek_vl_hybrid/test_processing_deepseek_vl_hybrid.py
+++ b/tests/models/deepseek_vl_hybrid/test_processing_deepseek_vl_hybrid.py
--- a/tests/models/dia/test_processing_dia.py
+++ b/tests/models/dia/test_processing_dia.py
--- a/tests/models/donut/test_processing_donut.py
+++ b/tests/models/donut/test_processing_donut.py
--- a/tests/models/emu3/test_processing_emu3.py
+++ b/tests/models/emu3/test_processing_emu3.py
@@ -91,7 +91,7 @@ class Emu3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        # For an image where pixels go from 0 to 255 the diff can be 1 due to some numerical precision errors when scaling and unscaling
        self.assertTrue(np.abs(orig_image - unnormalized_images).max() >= 1)

-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"

--- a/tests/models/evolla/test_processing_evolla.py
+++ b/tests/models/evolla/test_processing_evolla.py
--- a/tests/models/flava/test_processing_flava.py
+++ b/tests/models/flava/test_processing_flava.py
--- a/tests/models/fuyu/test_processing_fuyu.py
+++ b/tests/models/fuyu/test_processing_fuyu.py
@@ -64,7 +64,7 @@ class FuyuProcessingTest(ProcessorTesterMixin, unittest.TestCase):
    def get_image_processor(self, **kwargs):
        return AutoProcessor.from_pretrained(self.tmpdirname, **kwargs).image_processor

-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"

--- a/tests/models/gemma3/test_processing_gemma3.py
+++ b/tests/models/gemma3/test_processing_gemma3.py
@@ -58,7 +58,7 @@ class Gemma3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        processor.save_pretrained(cls.tmpdirname)
        cls.image_token = processor.boi_token

-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"

--- a/tests/models/git/test_processing_git.py
+++ b/tests/models/git/test_processing_git.py
--- a/tests/models/got_ocr2/test_processing_got_ocr2.py
+++ b/tests/models/got_ocr2/test_processing_got_ocr2.py
--- a/tests/models/granite_speech/test_processing_granite_speech.py
+++ b/tests/models/granite_speech/test_processing_granite_speech.py
--- a/tests/models/grounding_dino/test_processing_grounding_dino.py
+++ b/tests/models/grounding_dino/test_processing_grounding_dino.py
@@ -94,17 +94,17 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        return [labels, labels_longer] + [labels] * (batch_size - 2)

    @classmethod
-    # Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.get_tokenizer with CLIP->Bert
+    # Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.get_tokenizer with CLIP->Bert
    def get_tokenizer(cls, **kwargs):
        return BertTokenizer.from_pretrained(cls.tmpdirname, **kwargs)

    @classmethod
-    # Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.get_rust_tokenizer with CLIP->Bert
+    # Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.get_rust_tokenizer with CLIP->Bert
    def get_rust_tokenizer(cls, **kwargs):
        return BertTokenizerFast.from_pretrained(cls.tmpdirname, **kwargs)

    @classmethod
-    # Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.get_image_processor with CLIP->GroundingDino
+    # Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.get_image_processor with CLIP->GroundingDino
    def get_image_processor(cls, **kwargs):
        return GroundingDinoImageProcessor.from_pretrained(cls.tmpdirname, **kwargs)

@@ -145,7 +145,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        expected_box_slice = torch.tensor([0.6908, 0.4354, 1.0737, 1.3947])
        torch.testing.assert_close(post_processed[0]["boxes"][0], expected_box_slice, rtol=1e-4, atol=1e-4)

-    # Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_save_load_pretrained_default with CLIP->GroundingDino,GroundingDinoTokenizer->BertTokenizer
+    # Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_save_load_pretrained_default with CLIP->GroundingDino,GroundingDinoTokenizer->BertTokenizer
    def test_save_load_pretrained_default(self):
        tokenizer_slow = self.get_tokenizer()
        tokenizer_fast = self.get_rust_tokenizer()
@@ -171,7 +171,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        self.assertIsInstance(processor_slow.image_processor, GroundingDinoImageProcessor)
        self.assertIsInstance(processor_fast.image_processor, GroundingDinoImageProcessor)

-    # Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_save_load_pretrained_additional_features with CLIP->GroundingDino,GroundingDinoTokenizer->BertTokenizer
+    # Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_save_load_pretrained_additional_features with CLIP->GroundingDino,GroundingDinoTokenizer->BertTokenizer
    def test_save_load_pretrained_additional_features(self):
        with tempfile.TemporaryDirectory() as tmpdir:
            processor = GroundingDinoProcessor(
@@ -194,7 +194,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        self.assertEqual(processor.image_processor.to_json_string(), image_processor_add_kwargs.to_json_string())
        self.assertIsInstance(processor.image_processor, GroundingDinoImageProcessor)

-    # Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_image_processor with CLIP->GroundingDino
+    # Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_image_processor with CLIP->GroundingDino
    def test_image_processor(self):
        image_processor = self.get_image_processor()
        tokenizer = self.get_tokenizer()
@@ -209,7 +209,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        for key in input_image_proc:
            self.assertAlmostEqual(input_image_proc[key].sum(), input_processor[key].sum(), delta=1e-2)

-    # Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_tokenizer with CLIP->GroundingDino
+    # Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_tokenizer with CLIP->GroundingDino
    def test_tokenizer(self):
        image_processor = self.get_image_processor()
        tokenizer = self.get_tokenizer()
@@ -244,7 +244,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        with pytest.raises(ValueError):
            processor()

-    # Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_tokenizer_decode with CLIP->GroundingDino
+    # Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_tokenizer_decode with CLIP->GroundingDino
    def test_tokenizer_decode(self):
        image_processor = self.get_image_processor()
        tokenizer = self.get_tokenizer()
@@ -258,7 +258,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):

        self.assertListEqual(decoded_tok, decoded_processor)

-    # Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_model_input_names with CLIP->GroundingDino
+    # Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_model_input_names with CLIP->GroundingDino
    def test_model_input_names(self):
        image_processor = self.get_image_processor()
        tokenizer = self.get_tokenizer()
--- a/tests/models/idefics/test_processing_idefics.py
+++ b/tests/models/idefics/test_processing_idefics.py
--- a/tests/models/idefics2/test_processing_idefics2.py
+++ b/tests/models/idefics2/test_processing_idefics2.py
--- a/tests/models/idefics3/test_processing_idefics3.py
+++ b/tests/models/idefics3/test_processing_idefics3.py
@@ -84,7 +84,7 @@ class Idefics3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def prepare_processor_dict():
        return {"image_seq_len": 2}

-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"

@@ -284,7 +284,7 @@ class Idefics3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        self.assertEqual(np.array(inputs["pixel_values"]).shape, (2, 2, 3, 364, 364))
        self.assertEqual(np.array(inputs["pixel_attention_mask"]).shape, (2, 2, 364, 364))

-    # Copied from tests.models.idefics2.test_processor_idefics2.Idefics2ProcessorTest.test_process_interleaved_images_prompts_image_error
+    # Copied from tests.models.idefics2.test_processing_idefics2.Idefics2ProcessorTest.test_process_interleaved_images_prompts_image_error
    def test_process_interleaved_images_prompts_image_error(self):
        processor = self.get_processor()

--- a/tests/models/instructblip/test_processing_instructblip.py
+++ b/tests/models/instructblip/test_processing_instructblip.py
--- a/tests/models/instructblipvideo/test_processing_instructblipvideo.py
+++ b/tests/models/instructblipvideo/test_processing_instructblipvideo.py
--- a/tests/models/internvl/test_processing_internvl.py
+++ b/tests/models/internvl/test_processing_internvl.py
@@ -97,7 +97,7 @@ class InternVLProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def tearDownClass(cls):
        shutil.rmtree(cls.tmpdirname, ignore_errors=True)

-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"

--- a/tests/models/internvl/test_video_processing_internvl.py
+++ b/tests/models/internvl/test_video_processing_internvl.py
--- a/tests/models/janus/test_processing_janus.py
+++ b/tests/models/janus/test_processing_janus.py
--- a/tests/models/kosmos2/test_processing_kosmos2.py
+++ b/tests/models/kosmos2/test_processing_kosmos2.py
--- a/tests/models/layoutlmv2/test_processing_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_processing_layoutlmv2.py
--- a/tests/models/layoutlmv3/test_processing_layoutlmv3.py
+++ b/tests/models/layoutlmv3/test_processing_layoutlmv3.py
--- a/tests/models/layoutxlm/test_processing_layoutxlm.py
+++ b/tests/models/layoutxlm/test_processing_layoutxlm.py
--- a/tests/models/llama4/test_processing_llama4.py
+++ b/tests/models/llama4/test_processing_llama4.py
--- a/tests/models/llava/test_processing_llava.py
+++ b/tests/models/llava/test_processing_llava.py
--- a/tests/models/llava_next/test_processing_llava_next.py
+++ b/tests/models/llava_next/test_processing_llava_next.py
@@ -66,7 +66,7 @@ class LlavaNextProcessorTest(ProcessorTesterMixin, unittest.TestCase):
            "vision_feature_select_strategy": "default"
        }  # fmt: skip

-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"

@@ -79,7 +79,7 @@ class LlavaNextProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        self.assertTrue("num_image_patches" in output)
        self.assertEqual(len(output["num_image_patches"]), 3)

-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_chat_template_is_saved
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_chat_template_is_saved
    def test_chat_template_is_saved(self):
        processor_loaded = self.processor_class.from_pretrained(self.tmpdirname)
        processor_dict_loaded = json.loads(processor_loaded.to_json_string())
--- a/tests/models/llava_next_video/test_processing_llava_next_video.py
+++ b/tests/models/llava_next_video/test_processing_llava_next_video.py
@@ -75,7 +75,7 @@ class LlavaNextVideoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
            "vision_feature_select_strategy": "default",
        }

-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"

@@ -88,7 +88,7 @@ class LlavaNextVideoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        self.assertTrue("num_image_patches" in output)
        self.assertEqual(len(output["num_image_patches"]), 3)

-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_chat_template_is_saved
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_chat_template_is_saved
    def test_chat_template_is_saved(self):
        processor_loaded = self.processor_class.from_pretrained(self.tmpdirname)
        processor_dict_loaded = json.loads(processor_loaded.to_json_string())
--- a/tests/models/llava_onevision/test_processing_llava_onevision.py
+++ b/tests/models/llava_onevision/test_processing_llava_onevision.py
@@ -79,7 +79,7 @@ class LlavaOnevisionProcessorTest(ProcessorTesterMixin, unittest.TestCase):
            "vision_feature_select_strategy": "default"
        }  # fmt: skip

-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"

@@ -92,7 +92,7 @@ class LlavaOnevisionProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        self.assertTrue("num_image_patches" in output)
        self.assertEqual(len(output["num_image_patches"]), 3)

-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_chat_template_is_saved
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_chat_template_is_saved
    def test_chat_template_is_saved(self):
        processor_loaded = self.processor_class.from_pretrained(self.tmpdirname)
        processor_dict_loaded = json.loads(processor_loaded.to_json_string())
--- a/tests/models/markuplm/test_processing_markuplm.py
+++ b/tests/models/markuplm/test_processing_markuplm.py
--- a/tests/models/mgp_str/test_processing_mgp_str.py
+++ b/tests/models/mgp_str/test_processing_mgp_str.py
--- a/tests/models/mistral3/test_processing_mistral3.py
+++ b/tests/models/mistral3/test_processing_mistral3.py
--- a/tests/models/mllama/test_processing_mllama.py
+++ b/tests/models/mllama/test_processing_mllama.py
--- a/tests/models/musicgen/test_processing_musicgen.py
+++ b/tests/models/musicgen/test_processing_musicgen.py
--- a/tests/models/musicgen_melody/test_processing_musicgen_melody.py
+++ b/tests/models/musicgen_melody/test_processing_musicgen_melody.py
@@ -50,7 +50,7 @@ def floats_list(shape, scale=1.0, rng=None, name=None):
@require_torch
@require_sentencepiece
@require_torchaudio
-# Copied from tests.models.musicgen.test_processor_musicgen.MusicgenProcessorTest with Musicgen->MusicgenMelody, Encodec->MusicgenMelody, padding_mask->attention_mask, input_values->input_features
+# Copied from tests.models.musicgen.test_processing_musicgen.MusicgenProcessorTest with Musicgen->MusicgenMelody, Encodec->MusicgenMelody, padding_mask->attention_mask, input_values->input_features
 class MusicgenMelodyProcessorTest(unittest.TestCase):
    def setUp(self):
        # Ignore copy
--- a/tests/models/omdet_turbo/test_processing_omdet_turbo.py
+++ b/tests/models/omdet_turbo/test_processing_omdet_turbo.py
--- a/tests/models/oneformer/test_processing_oneformer.py
+++ b/tests/models/oneformer/test_processing_oneformer.py
--- a/tests/models/owlv2/test_processing_owlv2.py
+++ b/tests/models/owlv2/test_processing_owlv2.py
--- a/tests/models/owlvit/test_processing_owlvit.py
+++ b/tests/models/owlvit/test_processing_owlvit.py
--- a/tests/models/paligemma/test_processing_paligemma.py
+++ b/tests/models/paligemma/test_processing_paligemma.py
@@ -48,7 +48,7 @@ class PaliGemmaProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def tearDownClass(cls):
        shutil.rmtree(cls.tmpdirname, ignore_errors=True)

-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"

--- a/tests/models/perception_lm/test_processing_perception_lm.py
+++ b/tests/models/perception_lm/test_processing_perception_lm.py
--- a/tests/models/phi4_multimodal/test_feature_extraction_phi4_multimodal.py
+++ b/tests/models/phi4_multimodal/test_feature_extraction_phi4_multimodal.py
--- a/tests/models/pix2struct/test_processing_pix2struct.py
+++ b/tests/models/pix2struct/test_processing_pix2struct.py
--- a/tests/models/pixtral/test_processing_pixtral.py
+++ b/tests/models/pixtral/test_processing_pixtral.py
--- a/tests/models/pop2piano/test_processing_pop2piano.py
+++ b/tests/models/pop2piano/test_processing_pop2piano.py
--- a/tests/models/qwen2_5_omni/test_processing_qwen2_5_omni.py
+++ b/tests/models/qwen2_5_omni/test_processing_qwen2_5_omni.py
--- a/tests/models/qwen2_5_vl/test_processing_qwen2_5_vl.py
+++ b/tests/models/qwen2_5_vl/test_processing_qwen2_5_vl.py
@@ -65,7 +65,7 @@ class Qwen2_5_VLProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def tearDownClass(cls):
        shutil.rmtree(cls.tmpdirname, ignore_errors=True)

-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"

--- a/tests/models/qwen2_audio/test_processing_qwen2_audio.py
+++ b/tests/models/qwen2_audio/test_processing_qwen2_audio.py
--- a/tests/models/qwen2_vl/test_processing_qwen2_vl.py
+++ b/tests/models/qwen2_vl/test_processing_qwen2_vl.py
@@ -68,7 +68,7 @@ class Qwen2VLProcessorTest(ProcessorTesterMixin, unittest.TestCase):
    def tearDownClass(cls):
        shutil.rmtree(cls.tmpdirname, ignore_errors=True)

-    # Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
+    # Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
    def test_get_num_vision_tokens(self):
        "Tests general functionality of the helper used internally in vLLM"

--- a/tests/models/sam/test_processing_sam.py
+++ b/tests/models/sam/test_processing_sam.py
--- a/tests/models/sam_hq/test_processing_samhq.py
+++ b/tests/models/sam_hq/test_processing_samhq.py
--- a/tests/models/seamless_m4t/test_processing_seamless_m4t.py
+++ b/tests/models/seamless_m4t/test_processing_seamless_m4t.py
@@ -76,7 +76,7 @@ class SeamlessM4TProcessorTest(unittest.TestCase):
        tokenizer_instance = isinstance(processor.tokenizer, (SeamlessM4TTokenizerFast, SeamlessM4TTokenizer))
        self.assertTrue(tokenizer_instance)

-    # Copied from test.models.whisper.test_processor_whisper.WhisperProcessorTest.test_feature_extractor with Whisper->SeamlessM4T
+    # Copied from test.models.whisper.test_processing_whisper.WhisperProcessorTest.test_feature_extractor with Whisper->SeamlessM4T
    def test_feature_extractor(self):
        feature_extractor = self.get_feature_extractor()
        tokenizer = self.get_tokenizer()
@@ -91,7 +91,7 @@ class SeamlessM4TProcessorTest(unittest.TestCase):
        for key in input_feat_extract:
            self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2)

-    # Copied from test.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer with Whisper->SeamlessM4T
+    # Copied from test.models.whisper.test_processing_whisper.WhisperProcessorTest.test_tokenizer with Whisper->SeamlessM4T
    def test_tokenizer(self):
        feature_extractor = self.get_feature_extractor()
        tokenizer = self.get_tokenizer()
@@ -107,7 +107,7 @@ class SeamlessM4TProcessorTest(unittest.TestCase):
        for key in encoded_tok:
            self.assertListEqual(encoded_tok[key], encoded_processor[key])

-    # Copied from test.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->SeamlessM4T
+    # Copied from test.models.whisper.test_processing_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->SeamlessM4T
    def test_tokenizer_decode(self):
        feature_extractor = self.get_feature_extractor()
        tokenizer = self.get_tokenizer()
--- a/tests/models/smolvlm/test_processing_smolvlm.py
+++ b/tests/models/smolvlm/test_processing_smolvlm.py
@@ -297,7 +297,7 @@ class SmolVLMProcessorTest(ProcessorTesterMixin, unittest.TestCase):
        self.assertEqual(np.array(inputs["pixel_values"]).shape, (2, 2, 3, 512, 512))
        self.assertEqual(np.array(inputs["pixel_attention_mask"]).shape, (2, 2, 512, 512))

-    # Copied from tests.models.idefics2.test_processor_idefics2.Idefics2ProcessorTest.test_process_interleaved_images_prompts_image_error
+    # Copied from tests.models.idefics2.test_processing_idefics2.Idefics2ProcessorTest.test_process_interleaved_images_prompts_image_error
    def test_process_interleaved_images_prompts_image_error(self):
        processor = self.get_processor()

--- a/tests/models/speech_to_text/test_processing_speech_to_text.py
+++ b/tests/models/speech_to_text/test_processing_speech_to_text.py
--- a/tests/models/speecht5/test_processing_speecht5.py
+++ b/tests/models/speecht5/test_processing_speecht5.py
--- a/tests/models/trocr/test_processing_trocr.py
+++ b/tests/models/trocr/test_processing_trocr.py
--- a/tests/models/udop/test_processing_udop.py
+++ b/tests/models/udop/test_processing_udop.py
--- a/tests/models/vision_text_dual_encoder/test_processing_vision_text_dual_encoder.py
+++ b/tests/models/vision_text_dual_encoder/test_processing_vision_text_dual_encoder.py
--- a/tests/models/wav2vec2/test_processing_wav2vec2.py
+++ b/tests/models/wav2vec2/test_processing_wav2vec2.py
--- a/tests/models/wav2vec2_bert/test_processing_wav2vec2_bert.py
+++ b/tests/models/wav2vec2_bert/test_processing_wav2vec2_bert.py
--- a/tests/models/wav2vec2_with_lm/test_processing_wav2vec2_with_lm.py
+++ b/tests/models/wav2vec2_with_lm/test_processing_wav2vec2_with_lm.py
--- a/tests/models/whisper/test_processing_whisper.py
+++ b/tests/models/whisper/test_processing_whisper.py
--- a/tests/utils/test_add_new_model_like.py
+++ b/tests/utils/test_add_new_model_like.py
--- a/utils/modular_model_converter.py
+++ b/utils/modular_model_converter.py
@@ -17,12 +17,12 @@ import glob
 import importlib
 import os
 import re
+import subprocess
 from abc import ABC, abstractmethod
 from collections import Counter, defaultdict, deque
 from typing import Optional, Union

 import libcst as cst
-from check_copies import run_ruff
 from create_dependency_mapping import find_priority_list
 from libcst import ClassDef, CSTVisitor
 from libcst import matchers as m
@@ -1676,6 +1676,16 @@ def create_modules(modular_mapper: ModularFileMapper) -> dict[str, cst.Module]:
    return files


+def run_ruff(code, check=False):
+    if check:
+        command = ["ruff", "check", "-", "--fix", "--exit-zero"]
+    else:
+        command = ["ruff", "format", "-", "--config", "pyproject.toml", "--silent"]
+    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+    stdout, _ = process.communicate(input=code.encode())
+    return stdout.decode()
+
+
 def convert_modular_file(modular_file):
    pattern = re.search(r"modular_(.*)(?=\.py$)", modular_file)
    output = {}