Rework add-new-model-like with modular and make test filenames coherent (#39612)
* remove tf/flax * fix * style * Update add_new_model_like.py * work in progress * continue * more cleanup * simplify and first final version * fixes -> it works * add linter checks * Update add_new_model_like.py * fix * add modular conversion at the end * Update add_new_model_like.py * add video processor * Update add_new_model_like.py * Update add_new_model_like.py * Update add_new_model_like.py * fix * Update image_processing_auto.py * Update image_processing_auto.py * fix post rebase * start test filenames replacement * rename all test_processor -> test_processing * fix copied from * add docstrings * Update add_new_model_like.py * fix regex * improve wording * Update add_new_model_like.py * Update add_new_model_like.py * Update add_new_model_like.py * start adding test * fix * fix * proper first test * tests * fix * fix * fix * fix * modular can be used from anywhere * protect import * fix * Update add_new_model_like.py * fix
This commit is contained in:
@@ -29,7 +29,6 @@ from transformers.testing_utils import HfDoctestModule, HfDocTestParser
|
||||
NOT_DEVICE_TESTS = {
|
||||
"test_tokenization",
|
||||
"test_tokenization_mistral_common",
|
||||
"test_processor",
|
||||
"test_processing",
|
||||
"test_beam_constraints",
|
||||
"test_configuration_utils",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -62,7 +62,7 @@ else:
|
||||
("aimv2", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
|
||||
("aimv2_vision_model", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
|
||||
("align", ("EfficientNetImageProcessor", "EfficientNetImageProcessorFast")),
|
||||
("aria", ("AriaImageProcessor")),
|
||||
("aria", ("AriaImageProcessor", None)),
|
||||
("beit", ("BeitImageProcessor", "BeitImageProcessorFast")),
|
||||
("bit", ("BitImageProcessor", "BitImageProcessorFast")),
|
||||
("blip", ("BlipImageProcessor", "BlipImageProcessorFast")),
|
||||
@@ -72,7 +72,7 @@ else:
|
||||
("chinese_clip", ("ChineseCLIPImageProcessor", "ChineseCLIPImageProcessorFast")),
|
||||
("clip", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
|
||||
("clipseg", ("ViTImageProcessor", "ViTImageProcessorFast")),
|
||||
("cohere2_vision", ("Cohere2VisionImageProcessorFast",)),
|
||||
("cohere2_vision", (None, "Cohere2VisionImageProcessorFast")),
|
||||
("conditional_detr", ("ConditionalDetrImageProcessor", "ConditionalDetrImageProcessorFast")),
|
||||
("convnext", ("ConvNextImageProcessor", "ConvNextImageProcessorFast")),
|
||||
("convnextv2", ("ConvNextImageProcessor", "ConvNextImageProcessorFast")),
|
||||
@@ -84,52 +84,52 @@ else:
|
||||
("deit", ("DeiTImageProcessor", "DeiTImageProcessorFast")),
|
||||
("depth_anything", ("DPTImageProcessor", "DPTImageProcessorFast")),
|
||||
("depth_pro", ("DepthProImageProcessor", "DepthProImageProcessorFast")),
|
||||
("deta", ("DetaImageProcessor",)),
|
||||
("deta", ("DetaImageProcessor", None)),
|
||||
("detr", ("DetrImageProcessor", "DetrImageProcessorFast")),
|
||||
("dinat", ("ViTImageProcessor", "ViTImageProcessorFast")),
|
||||
("dinov2", ("BitImageProcessor", "BitImageProcessorFast")),
|
||||
("donut-swin", ("DonutImageProcessor", "DonutImageProcessorFast")),
|
||||
("dpt", ("DPTImageProcessor", "DPTImageProcessorFast")),
|
||||
("efficientformer", ("EfficientFormerImageProcessor",)),
|
||||
("efficientloftr", ("EfficientLoFTRImageProcessor",)),
|
||||
("efficientformer", ("EfficientFormerImageProcessor", None)),
|
||||
("efficientloftr", ("EfficientLoFTRImageProcessor", None)),
|
||||
("efficientnet", ("EfficientNetImageProcessor", "EfficientNetImageProcessorFast")),
|
||||
("eomt", ("EomtImageProcessor", "EomtImageProcessorFast")),
|
||||
("flava", ("FlavaImageProcessor", "FlavaImageProcessorFast")),
|
||||
("focalnet", ("BitImageProcessor", "BitImageProcessorFast")),
|
||||
("fuyu", ("FuyuImageProcessor",)),
|
||||
("fuyu", ("FuyuImageProcessor", None)),
|
||||
("gemma3", ("Gemma3ImageProcessor", "Gemma3ImageProcessorFast")),
|
||||
("gemma3n", ("SiglipImageProcessor", "SiglipImageProcessorFast")),
|
||||
("git", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
|
||||
("glm4v", ("Glm4vImageProcessor", "Glm4vImageProcessorFast")),
|
||||
("glpn", ("GLPNImageProcessor",)),
|
||||
("glpn", ("GLPNImageProcessor", None)),
|
||||
("got_ocr2", ("GotOcr2ImageProcessor", "GotOcr2ImageProcessorFast")),
|
||||
("grounding-dino", ("GroundingDinoImageProcessor", "GroundingDinoImageProcessorFast")),
|
||||
("groupvit", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
|
||||
("hiera", ("BitImageProcessor", "BitImageProcessorFast")),
|
||||
("idefics", ("IdeficsImageProcessor",)),
|
||||
("idefics", ("IdeficsImageProcessor", None)),
|
||||
("idefics2", ("Idefics2ImageProcessor", "Idefics2ImageProcessorFast")),
|
||||
("idefics3", ("Idefics3ImageProcessor", "Idefics3ImageProcessorFast")),
|
||||
("ijepa", ("ViTImageProcessor", "ViTImageProcessorFast")),
|
||||
("imagegpt", ("ImageGPTImageProcessor",)),
|
||||
("imagegpt", ("ImageGPTImageProcessor", None)),
|
||||
("instructblip", ("BlipImageProcessor", "BlipImageProcessorFast")),
|
||||
("instructblipvideo", ("InstructBlipVideoImageProcessor",)),
|
||||
("instructblipvideo", ("InstructBlipVideoImageProcessor", None)),
|
||||
("janus", ("JanusImageProcessor", "JanusImageProcessorFast")),
|
||||
("kosmos-2", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
|
||||
("layoutlmv2", ("LayoutLMv2ImageProcessor", "LayoutLMv2ImageProcessorFast")),
|
||||
("layoutlmv3", ("LayoutLMv3ImageProcessor", "LayoutLMv3ImageProcessorFast")),
|
||||
("levit", ("LevitImageProcessor", "LevitImageProcessorFast")),
|
||||
("lightglue", ("LightGlueImageProcessor",)),
|
||||
("lightglue", ("LightGlueImageProcessor", None)),
|
||||
("llama4", ("Llama4ImageProcessor", "Llama4ImageProcessorFast")),
|
||||
("llava", ("LlavaImageProcessor", "LlavaImageProcessorFast")),
|
||||
("llava_next", ("LlavaNextImageProcessor", "LlavaNextImageProcessorFast")),
|
||||
("llava_next_video", ("LlavaNextVideoImageProcessor",)),
|
||||
("llava_next_video", ("LlavaNextVideoImageProcessor", None)),
|
||||
("llava_onevision", ("LlavaOnevisionImageProcessor", "LlavaOnevisionImageProcessorFast")),
|
||||
("mask2former", ("Mask2FormerImageProcessor", "Mask2FormerImageProcessorFast")),
|
||||
("maskformer", ("MaskFormerImageProcessor", "MaskFormerImageProcessorFast")),
|
||||
("mgp-str", ("ViTImageProcessor", "ViTImageProcessorFast")),
|
||||
("mistral3", ("PixtralImageProcessor", "PixtralImageProcessorFast")),
|
||||
("mlcd", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
|
||||
("mllama", ("MllamaImageProcessor",)),
|
||||
("mllama", ("MllamaImageProcessor", None)),
|
||||
("mm-grounding-dino", ("GroundingDinoImageProcessor", "GroundingDinoImageProcessorFast")),
|
||||
("mobilenet_v1", ("MobileNetV1ImageProcessor", "MobileNetV1ImageProcessorFast")),
|
||||
("mobilenet_v2", ("MobileNetV2ImageProcessor", "MobileNetV2ImageProcessorFast")),
|
||||
@@ -142,12 +142,12 @@ else:
|
||||
("owlvit", ("OwlViTImageProcessor", "OwlViTImageProcessorFast")),
|
||||
("paligemma", ("SiglipImageProcessor", "SiglipImageProcessorFast")),
|
||||
("perceiver", ("PerceiverImageProcessor", "PerceiverImageProcessorFast")),
|
||||
("perception_lm", ("PerceptionLMImageProcessorFast",)),
|
||||
("phi4_multimodal", ("Phi4MultimodalImageProcessorFast",)),
|
||||
("pix2struct", ("Pix2StructImageProcessor",)),
|
||||
("perception_lm", (None, "PerceptionLMImageProcessorFast")),
|
||||
("phi4_multimodal", (None, "Phi4MultimodalImageProcessorFast")),
|
||||
("pix2struct", ("Pix2StructImageProcessor", None)),
|
||||
("pixtral", ("PixtralImageProcessor", "PixtralImageProcessorFast")),
|
||||
("poolformer", ("PoolFormerImageProcessor", "PoolFormerImageProcessorFast")),
|
||||
("prompt_depth_anything", ("PromptDepthAnythingImageProcessor",)),
|
||||
("prompt_depth_anything", ("PromptDepthAnythingImageProcessor", None)),
|
||||
("pvt", ("PvtImageProcessor", "PvtImageProcessorFast")),
|
||||
("pvt_v2", ("PvtImageProcessor", "PvtImageProcessorFast")),
|
||||
("qwen2_5_vl", ("Qwen2VLImageProcessor", "Qwen2VLImageProcessorFast")),
|
||||
@@ -157,39 +157,31 @@ else:
|
||||
("rt_detr", ("RTDetrImageProcessor", "RTDetrImageProcessorFast")),
|
||||
("sam", ("SamImageProcessor", "SamImageProcessorFast")),
|
||||
("sam_hq", ("SamImageProcessor", "SamImageProcessorFast")),
|
||||
("segformer", ("SegformerImageProcessor",)),
|
||||
("segformer", ("SegformerImageProcessor", "SegformerImageProcessorFast")),
|
||||
("seggpt", ("SegGptImageProcessor",)),
|
||||
("seggpt", ("SegGptImageProcessor", None)),
|
||||
("shieldgemma2", ("Gemma3ImageProcessor", "Gemma3ImageProcessorFast")),
|
||||
("siglip", ("SiglipImageProcessor", "SiglipImageProcessorFast")),
|
||||
("siglip2", ("Siglip2ImageProcessor", "Siglip2ImageProcessorFast")),
|
||||
("smolvlm", ("SmolVLMImageProcessor", "SmolVLMImageProcessorFast")),
|
||||
("superglue", ("SuperGlueImageProcessor",)),
|
||||
(
|
||||
"superpoint",
|
||||
(
|
||||
"SuperPointImageProcessor",
|
||||
"SuperPointImageProcessorFast",
|
||||
),
|
||||
),
|
||||
("superglue", ("SuperGlueImageProcessor", None)),
|
||||
("superpoint", ("SuperPointImageProcessor", "SuperPointImageProcessorFast")),
|
||||
("swiftformer", ("ViTImageProcessor", "ViTImageProcessorFast")),
|
||||
("swin", ("ViTImageProcessor", "ViTImageProcessorFast")),
|
||||
("swin2sr", ("Swin2SRImageProcessor", "Swin2SRImageProcessorFast")),
|
||||
("swinv2", ("ViTImageProcessor", "ViTImageProcessorFast")),
|
||||
("table-transformer", ("DetrImageProcessor",)),
|
||||
("timesformer", ("VideoMAEImageProcessor",)),
|
||||
("timm_wrapper", ("TimmWrapperImageProcessor",)),
|
||||
("tvlt", ("TvltImageProcessor",)),
|
||||
("tvp", ("TvpImageProcessor",)),
|
||||
("table-transformer", ("DetrImageProcessor", None)),
|
||||
("timesformer", ("VideoMAEImageProcessor", None)),
|
||||
("timm_wrapper", ("TimmWrapperImageProcessor", None)),
|
||||
("tvlt", ("TvltImageProcessor", None)),
|
||||
("tvp", ("TvpImageProcessor", None)),
|
||||
("udop", ("LayoutLMv3ImageProcessor", "LayoutLMv3ImageProcessorFast")),
|
||||
("udop", ("LayoutLMv3ImageProcessor",)),
|
||||
("upernet", ("SegformerImageProcessor", "SegformerImageProcessorFast")),
|
||||
("van", ("ConvNextImageProcessor", "ConvNextImageProcessorFast")),
|
||||
("videomae", ("VideoMAEImageProcessor",)),
|
||||
("videomae", ("VideoMAEImageProcessor", None)),
|
||||
("vilt", ("ViltImageProcessor", "ViltImageProcessorFast")),
|
||||
("vipllava", ("CLIPImageProcessor", "CLIPImageProcessorFast")),
|
||||
("vit", ("ViTImageProcessor", "ViTImageProcessorFast")),
|
||||
("vit_hybrid", ("ViTHybridImageProcessor",)),
|
||||
("vit_hybrid", ("ViTHybridImageProcessor", None)),
|
||||
("vit_mae", ("ViTImageProcessor", "ViTImageProcessorFast")),
|
||||
("vit_msn", ("ViTImageProcessor", "ViTImageProcessorFast")),
|
||||
("vitmatte", ("VitMatteImageProcessor", "VitMatteImageProcessorFast")),
|
||||
@@ -199,18 +191,14 @@ else:
|
||||
]
|
||||
)
|
||||
|
||||
for model_type, image_processors in IMAGE_PROCESSOR_MAPPING_NAMES.items():
|
||||
slow_image_processor_class, *fast_image_processor_class = image_processors
|
||||
# Override to None if the packages are not available
|
||||
for model_type, (slow_class, fast_class) in IMAGE_PROCESSOR_MAPPING_NAMES.items():
|
||||
if not is_vision_available():
|
||||
slow_image_processor_class = None
|
||||
slow_class = None
|
||||
if not is_torchvision_available():
|
||||
fast_class = None
|
||||
|
||||
# If the fast image processor is not defined, or torchvision is not available, we set it to None
|
||||
if not fast_image_processor_class or fast_image_processor_class[0] is None or not is_torchvision_available():
|
||||
fast_image_processor_class = None
|
||||
else:
|
||||
fast_image_processor_class = fast_image_processor_class[0]
|
||||
|
||||
IMAGE_PROCESSOR_MAPPING_NAMES[model_type] = (slow_image_processor_class, fast_image_processor_class)
|
||||
IMAGE_PROCESSOR_MAPPING_NAMES[model_type] = (slow_class, fast_class)
|
||||
|
||||
IMAGE_PROCESSOR_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, IMAGE_PROCESSOR_MAPPING_NAMES)
|
||||
|
||||
|
||||
@@ -179,6 +179,7 @@ from .import_utils import (
|
||||
is_keras_nlp_available,
|
||||
is_kernels_available,
|
||||
is_levenshtein_available,
|
||||
is_libcst_available,
|
||||
is_librosa_available,
|
||||
is_liger_kernel_available,
|
||||
is_lomo_available,
|
||||
|
||||
@@ -120,6 +120,7 @@ _vptq_available, _vptq_version = _is_package_available("vptq", return_version=Tr
|
||||
_av_available = importlib.util.find_spec("av") is not None
|
||||
_decord_available = importlib.util.find_spec("decord") is not None
|
||||
_torchcodec_available = importlib.util.find_spec("torchcodec") is not None
|
||||
_libcst_available = _is_package_available("libcst")
|
||||
_bitsandbytes_available = _is_package_available("bitsandbytes")
|
||||
_eetq_available = _is_package_available("eetq")
|
||||
_fbgemm_gpu_available = _is_package_available("fbgemm_gpu")
|
||||
@@ -379,6 +380,10 @@ def is_torch_available():
|
||||
return _torch_available
|
||||
|
||||
|
||||
def is_libcst_available():
|
||||
return _libcst_available
|
||||
|
||||
|
||||
def is_accelerate_available(min_version: str = ACCELERATE_MIN_VERSION):
|
||||
return _accelerate_available and version.parse(_accelerate_version) >= version.parse(min_version)
|
||||
|
||||
|
||||
@@ -95,7 +95,7 @@ class AriaProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
def tearDownClass(cls):
|
||||
shutil.rmtree(cls.tmpdirname, ignore_errors=True)
|
||||
|
||||
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
def test_get_num_vision_tokens(self):
|
||||
"Tests general functionality of the helper used internally in vLLM"
|
||||
|
||||
@@ -80,7 +80,7 @@ class AyaVisionProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
def tearDownClass(cls):
|
||||
shutil.rmtree(cls.tmpdirname, ignore_errors=True)
|
||||
|
||||
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
def test_get_num_vision_tokens(self):
|
||||
"Tests general functionality of the helper used internally in vLLM"
|
||||
|
||||
@@ -75,7 +75,7 @@ class ChameleonProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
def prepare_processor_dict():
|
||||
return {"image_seq_length": 2} # fmt: skip
|
||||
|
||||
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
def test_get_num_vision_tokens(self):
|
||||
"Tests general functionality of the helper used internally in vLLM"
|
||||
|
||||
@@ -35,15 +35,15 @@ class ClvpProcessorTest(unittest.TestCase):
|
||||
shutil.rmtree(self.tmpdirname)
|
||||
gc.collect()
|
||||
|
||||
# Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.get_tokenizer with Whisper->Clvp
|
||||
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.get_tokenizer with Whisper->Clvp
|
||||
def get_tokenizer(self, **kwargs):
|
||||
return ClvpTokenizer.from_pretrained(self.checkpoint, **kwargs)
|
||||
|
||||
# Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.get_feature_extractor with Whisper->Clvp
|
||||
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.get_feature_extractor with Whisper->Clvp
|
||||
def get_feature_extractor(self, **kwargs):
|
||||
return ClvpFeatureExtractor.from_pretrained(self.checkpoint, **kwargs)
|
||||
|
||||
# Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.test_save_load_pretrained_default with Whisper->Clvp
|
||||
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_save_load_pretrained_default with Whisper->Clvp
|
||||
def test_save_load_pretrained_default(self):
|
||||
tokenizer = self.get_tokenizer()
|
||||
feature_extractor = self.get_feature_extractor()
|
||||
@@ -59,7 +59,7 @@ class ClvpProcessorTest(unittest.TestCase):
|
||||
self.assertEqual(processor.feature_extractor.to_json_string(), feature_extractor.to_json_string())
|
||||
self.assertIsInstance(processor.feature_extractor, ClvpFeatureExtractor)
|
||||
|
||||
# Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.test_feature_extractor with Whisper->Clvp,processor(raw_speech->processor(raw_speech=raw_speech
|
||||
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_feature_extractor with Whisper->Clvp,processor(raw_speech->processor(raw_speech=raw_speech
|
||||
def test_feature_extractor(self):
|
||||
feature_extractor = self.get_feature_extractor()
|
||||
tokenizer = self.get_tokenizer()
|
||||
@@ -74,7 +74,7 @@ class ClvpProcessorTest(unittest.TestCase):
|
||||
for key in input_feat_extract:
|
||||
self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2)
|
||||
|
||||
# Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer with Whisper->Clvp
|
||||
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_tokenizer with Whisper->Clvp
|
||||
def test_tokenizer(self):
|
||||
feature_extractor = self.get_feature_extractor()
|
||||
tokenizer = self.get_tokenizer()
|
||||
@@ -90,7 +90,7 @@ class ClvpProcessorTest(unittest.TestCase):
|
||||
for key in encoded_tok:
|
||||
self.assertListEqual(encoded_tok[key], encoded_processor[key])
|
||||
|
||||
# Copied from transformers.tests.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->Clvp
|
||||
# Copied from transformers.tests.models.whisper.test_processing_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->Clvp
|
||||
def test_tokenizer_decode(self):
|
||||
feature_extractor = self.get_feature_extractor()
|
||||
tokenizer = self.get_tokenizer()
|
||||
@@ -54,7 +54,7 @@ class ColPaliProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
def tearDownClass(cls):
|
||||
shutil.rmtree(cls.tmpdirname, ignore_errors=True)
|
||||
|
||||
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
def test_get_num_vision_tokens(self):
|
||||
"Tests general functionality of the helper used internally in vLLM"
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ class ColQwen2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
def tearDownClass(cls):
|
||||
shutil.rmtree(cls.tmpdirname)
|
||||
|
||||
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
def test_get_num_vision_tokens(self):
|
||||
"Tests general functionality of the helper used internally in vLLM"
|
||||
|
||||
|
||||
@@ -91,7 +91,7 @@ class Emu3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
# For an image where pixels go from 0 to 255 the diff can be 1 due to some numerical precision errors when scaling and unscaling
|
||||
self.assertTrue(np.abs(orig_image - unnormalized_images).max() >= 1)
|
||||
|
||||
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
def test_get_num_vision_tokens(self):
|
||||
"Tests general functionality of the helper used internally in vLLM"
|
||||
|
||||
@@ -64,7 +64,7 @@ class FuyuProcessingTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
def get_image_processor(self, **kwargs):
|
||||
return AutoProcessor.from_pretrained(self.tmpdirname, **kwargs).image_processor
|
||||
|
||||
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
def test_get_num_vision_tokens(self):
|
||||
"Tests general functionality of the helper used internally in vLLM"
|
||||
|
||||
@@ -58,7 +58,7 @@ class Gemma3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
processor.save_pretrained(cls.tmpdirname)
|
||||
cls.image_token = processor.boi_token
|
||||
|
||||
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
def test_get_num_vision_tokens(self):
|
||||
"Tests general functionality of the helper used internally in vLLM"
|
||||
|
||||
|
||||
@@ -94,17 +94,17 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
return [labels, labels_longer] + [labels] * (batch_size - 2)
|
||||
|
||||
@classmethod
|
||||
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.get_tokenizer with CLIP->Bert
|
||||
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.get_tokenizer with CLIP->Bert
|
||||
def get_tokenizer(cls, **kwargs):
|
||||
return BertTokenizer.from_pretrained(cls.tmpdirname, **kwargs)
|
||||
|
||||
@classmethod
|
||||
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.get_rust_tokenizer with CLIP->Bert
|
||||
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.get_rust_tokenizer with CLIP->Bert
|
||||
def get_rust_tokenizer(cls, **kwargs):
|
||||
return BertTokenizerFast.from_pretrained(cls.tmpdirname, **kwargs)
|
||||
|
||||
@classmethod
|
||||
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.get_image_processor with CLIP->GroundingDino
|
||||
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.get_image_processor with CLIP->GroundingDino
|
||||
def get_image_processor(cls, **kwargs):
|
||||
return GroundingDinoImageProcessor.from_pretrained(cls.tmpdirname, **kwargs)
|
||||
|
||||
@@ -145,7 +145,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
expected_box_slice = torch.tensor([0.6908, 0.4354, 1.0737, 1.3947])
|
||||
torch.testing.assert_close(post_processed[0]["boxes"][0], expected_box_slice, rtol=1e-4, atol=1e-4)
|
||||
|
||||
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_save_load_pretrained_default with CLIP->GroundingDino,GroundingDinoTokenizer->BertTokenizer
|
||||
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_save_load_pretrained_default with CLIP->GroundingDino,GroundingDinoTokenizer->BertTokenizer
|
||||
def test_save_load_pretrained_default(self):
|
||||
tokenizer_slow = self.get_tokenizer()
|
||||
tokenizer_fast = self.get_rust_tokenizer()
|
||||
@@ -171,7 +171,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
self.assertIsInstance(processor_slow.image_processor, GroundingDinoImageProcessor)
|
||||
self.assertIsInstance(processor_fast.image_processor, GroundingDinoImageProcessor)
|
||||
|
||||
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_save_load_pretrained_additional_features with CLIP->GroundingDino,GroundingDinoTokenizer->BertTokenizer
|
||||
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_save_load_pretrained_additional_features with CLIP->GroundingDino,GroundingDinoTokenizer->BertTokenizer
|
||||
def test_save_load_pretrained_additional_features(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
processor = GroundingDinoProcessor(
|
||||
@@ -194,7 +194,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
self.assertEqual(processor.image_processor.to_json_string(), image_processor_add_kwargs.to_json_string())
|
||||
self.assertIsInstance(processor.image_processor, GroundingDinoImageProcessor)
|
||||
|
||||
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_image_processor with CLIP->GroundingDino
|
||||
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_image_processor with CLIP->GroundingDino
|
||||
def test_image_processor(self):
|
||||
image_processor = self.get_image_processor()
|
||||
tokenizer = self.get_tokenizer()
|
||||
@@ -209,7 +209,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
for key in input_image_proc:
|
||||
self.assertAlmostEqual(input_image_proc[key].sum(), input_processor[key].sum(), delta=1e-2)
|
||||
|
||||
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_tokenizer with CLIP->GroundingDino
|
||||
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_tokenizer with CLIP->GroundingDino
|
||||
def test_tokenizer(self):
|
||||
image_processor = self.get_image_processor()
|
||||
tokenizer = self.get_tokenizer()
|
||||
@@ -244,7 +244,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
with pytest.raises(ValueError):
|
||||
processor()
|
||||
|
||||
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_tokenizer_decode with CLIP->GroundingDino
|
||||
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_tokenizer_decode with CLIP->GroundingDino
|
||||
def test_tokenizer_decode(self):
|
||||
image_processor = self.get_image_processor()
|
||||
tokenizer = self.get_tokenizer()
|
||||
@@ -258,7 +258,7 @@ class GroundingDinoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
|
||||
self.assertListEqual(decoded_tok, decoded_processor)
|
||||
|
||||
# Copied from tests.models.clip.test_processor_clip.CLIPProcessorTest.test_model_input_names with CLIP->GroundingDino
|
||||
# Copied from tests.models.clip.test_processing_clip.CLIPProcessorTest.test_model_input_names with CLIP->GroundingDino
|
||||
def test_model_input_names(self):
|
||||
image_processor = self.get_image_processor()
|
||||
tokenizer = self.get_tokenizer()
|
||||
@@ -84,7 +84,7 @@ class Idefics3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
def prepare_processor_dict():
|
||||
return {"image_seq_len": 2}
|
||||
|
||||
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
def test_get_num_vision_tokens(self):
|
||||
"Tests general functionality of the helper used internally in vLLM"
|
||||
|
||||
@@ -284,7 +284,7 @@ class Idefics3ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
self.assertEqual(np.array(inputs["pixel_values"]).shape, (2, 2, 3, 364, 364))
|
||||
self.assertEqual(np.array(inputs["pixel_attention_mask"]).shape, (2, 2, 364, 364))
|
||||
|
||||
# Copied from tests.models.idefics2.test_processor_idefics2.Idefics2ProcessorTest.test_process_interleaved_images_prompts_image_error
|
||||
# Copied from tests.models.idefics2.test_processing_idefics2.Idefics2ProcessorTest.test_process_interleaved_images_prompts_image_error
|
||||
def test_process_interleaved_images_prompts_image_error(self):
|
||||
processor = self.get_processor()
|
||||
|
||||
@@ -97,7 +97,7 @@ class InternVLProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
def tearDownClass(cls):
|
||||
shutil.rmtree(cls.tmpdirname, ignore_errors=True)
|
||||
|
||||
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
def test_get_num_vision_tokens(self):
|
||||
"Tests general functionality of the helper used internally in vLLM"
|
||||
|
||||
@@ -66,7 +66,7 @@ class LlavaNextProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
"vision_feature_select_strategy": "default"
|
||||
} # fmt: skip
|
||||
|
||||
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
def test_get_num_vision_tokens(self):
|
||||
"Tests general functionality of the helper used internally in vLLM"
|
||||
|
||||
@@ -79,7 +79,7 @@ class LlavaNextProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
self.assertTrue("num_image_patches" in output)
|
||||
self.assertEqual(len(output["num_image_patches"]), 3)
|
||||
|
||||
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_chat_template_is_saved
|
||||
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_chat_template_is_saved
|
||||
def test_chat_template_is_saved(self):
|
||||
processor_loaded = self.processor_class.from_pretrained(self.tmpdirname)
|
||||
processor_dict_loaded = json.loads(processor_loaded.to_json_string())
|
||||
@@ -75,7 +75,7 @@ class LlavaNextVideoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
"vision_feature_select_strategy": "default",
|
||||
}
|
||||
|
||||
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
def test_get_num_vision_tokens(self):
|
||||
"Tests general functionality of the helper used internally in vLLM"
|
||||
|
||||
@@ -88,7 +88,7 @@ class LlavaNextVideoProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
self.assertTrue("num_image_patches" in output)
|
||||
self.assertEqual(len(output["num_image_patches"]), 3)
|
||||
|
||||
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_chat_template_is_saved
|
||||
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_chat_template_is_saved
|
||||
def test_chat_template_is_saved(self):
|
||||
processor_loaded = self.processor_class.from_pretrained(self.tmpdirname)
|
||||
processor_dict_loaded = json.loads(processor_loaded.to_json_string())
|
||||
@@ -79,7 +79,7 @@ class LlavaOnevisionProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
"vision_feature_select_strategy": "default"
|
||||
} # fmt: skip
|
||||
|
||||
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
def test_get_num_vision_tokens(self):
|
||||
"Tests general functionality of the helper used internally in vLLM"
|
||||
|
||||
@@ -92,7 +92,7 @@ class LlavaOnevisionProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
self.assertTrue("num_image_patches" in output)
|
||||
self.assertEqual(len(output["num_image_patches"]), 3)
|
||||
|
||||
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_chat_template_is_saved
|
||||
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_chat_template_is_saved
|
||||
def test_chat_template_is_saved(self):
|
||||
processor_loaded = self.processor_class.from_pretrained(self.tmpdirname)
|
||||
processor_dict_loaded = json.loads(processor_loaded.to_json_string())
|
||||
@@ -50,7 +50,7 @@ def floats_list(shape, scale=1.0, rng=None, name=None):
|
||||
@require_torch
|
||||
@require_sentencepiece
|
||||
@require_torchaudio
|
||||
# Copied from tests.models.musicgen.test_processor_musicgen.MusicgenProcessorTest with Musicgen->MusicgenMelody, Encodec->MusicgenMelody, padding_mask->attention_mask, input_values->input_features
|
||||
# Copied from tests.models.musicgen.test_processing_musicgen.MusicgenProcessorTest with Musicgen->MusicgenMelody, Encodec->MusicgenMelody, padding_mask->attention_mask, input_values->input_features
|
||||
class MusicgenMelodyProcessorTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# Ignore copy
|
||||
@@ -48,7 +48,7 @@ class PaliGemmaProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
def tearDownClass(cls):
|
||||
shutil.rmtree(cls.tmpdirname, ignore_errors=True)
|
||||
|
||||
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
def test_get_num_vision_tokens(self):
|
||||
"Tests general functionality of the helper used internally in vLLM"
|
||||
|
||||
@@ -65,7 +65,7 @@ class Qwen2_5_VLProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
def tearDownClass(cls):
|
||||
shutil.rmtree(cls.tmpdirname, ignore_errors=True)
|
||||
|
||||
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
def test_get_num_vision_tokens(self):
|
||||
"Tests general functionality of the helper used internally in vLLM"
|
||||
|
||||
@@ -68,7 +68,7 @@ class Qwen2VLProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
def tearDownClass(cls):
|
||||
shutil.rmtree(cls.tmpdirname, ignore_errors=True)
|
||||
|
||||
# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
# Copied from tests.models.llava.test_processing_llava.LlavaProcessorTest.test_get_num_vision_tokens
|
||||
def test_get_num_vision_tokens(self):
|
||||
"Tests general functionality of the helper used internally in vLLM"
|
||||
|
||||
@@ -76,7 +76,7 @@ class SeamlessM4TProcessorTest(unittest.TestCase):
|
||||
tokenizer_instance = isinstance(processor.tokenizer, (SeamlessM4TTokenizerFast, SeamlessM4TTokenizer))
|
||||
self.assertTrue(tokenizer_instance)
|
||||
|
||||
# Copied from test.models.whisper.test_processor_whisper.WhisperProcessorTest.test_feature_extractor with Whisper->SeamlessM4T
|
||||
# Copied from test.models.whisper.test_processing_whisper.WhisperProcessorTest.test_feature_extractor with Whisper->SeamlessM4T
|
||||
def test_feature_extractor(self):
|
||||
feature_extractor = self.get_feature_extractor()
|
||||
tokenizer = self.get_tokenizer()
|
||||
@@ -91,7 +91,7 @@ class SeamlessM4TProcessorTest(unittest.TestCase):
|
||||
for key in input_feat_extract:
|
||||
self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2)
|
||||
|
||||
# Copied from test.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer with Whisper->SeamlessM4T
|
||||
# Copied from test.models.whisper.test_processing_whisper.WhisperProcessorTest.test_tokenizer with Whisper->SeamlessM4T
|
||||
def test_tokenizer(self):
|
||||
feature_extractor = self.get_feature_extractor()
|
||||
tokenizer = self.get_tokenizer()
|
||||
@@ -107,7 +107,7 @@ class SeamlessM4TProcessorTest(unittest.TestCase):
|
||||
for key in encoded_tok:
|
||||
self.assertListEqual(encoded_tok[key], encoded_processor[key])
|
||||
|
||||
# Copied from test.models.whisper.test_processor_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->SeamlessM4T
|
||||
# Copied from test.models.whisper.test_processing_whisper.WhisperProcessorTest.test_tokenizer_decode with Whisper->SeamlessM4T
|
||||
def test_tokenizer_decode(self):
|
||||
feature_extractor = self.get_feature_extractor()
|
||||
tokenizer = self.get_tokenizer()
|
||||
@@ -297,7 +297,7 @@ class SmolVLMProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
self.assertEqual(np.array(inputs["pixel_values"]).shape, (2, 2, 3, 512, 512))
|
||||
self.assertEqual(np.array(inputs["pixel_attention_mask"]).shape, (2, 2, 512, 512))
|
||||
|
||||
# Copied from tests.models.idefics2.test_processor_idefics2.Idefics2ProcessorTest.test_process_interleaved_images_prompts_image_error
|
||||
# Copied from tests.models.idefics2.test_processing_idefics2.Idefics2ProcessorTest.test_process_interleaved_images_prompts_image_error
|
||||
def test_process_interleaved_images_prompts_image_error(self):
|
||||
processor = self.get_processor()
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -17,12 +17,12 @@ import glob
|
||||
import importlib
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
from abc import ABC, abstractmethod
|
||||
from collections import Counter, defaultdict, deque
|
||||
from typing import Optional, Union
|
||||
|
||||
import libcst as cst
|
||||
from check_copies import run_ruff
|
||||
from create_dependency_mapping import find_priority_list
|
||||
from libcst import ClassDef, CSTVisitor
|
||||
from libcst import matchers as m
|
||||
@@ -1676,6 +1676,16 @@ def create_modules(modular_mapper: ModularFileMapper) -> dict[str, cst.Module]:
|
||||
return files
|
||||
|
||||
|
||||
def run_ruff(code, check=False):
|
||||
if check:
|
||||
command = ["ruff", "check", "-", "--fix", "--exit-zero"]
|
||||
else:
|
||||
command = ["ruff", "format", "-", "--config", "pyproject.toml", "--silent"]
|
||||
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
|
||||
stdout, _ = process.communicate(input=code.encode())
|
||||
return stdout.decode()
|
||||
|
||||
|
||||
def convert_modular_file(modular_file):
|
||||
pattern = re.search(r"modular_(.*)(?=\.py$)", modular_file)
|
||||
output = {}
|
||||
|
||||
Reference in New Issue
Block a user