From bc3253f07678538188185f179cf332be702ce6d5 Mon Sep 17 00:00:00 2001 From: Yoni Gozlan <74535834+yonigozlan@users.noreply.github.com> Date: Wed, 12 Mar 2025 18:39:25 -0400 Subject: [PATCH] Remove hardcoded slow image processor class in processors supporting fast ones (#36266) * Add fast image processor class to processors supporting them * fix test kosmos2 --- .../models/altclip/processing_altclip.py | 2 +- .../models/auto/image_processing_auto.py | 4 ++-- src/transformers/models/blip/processing_blip.py | 2 +- src/transformers/models/blip_2/processing_blip_2.py | 2 +- src/transformers/models/clip/processing_clip.py | 2 +- .../models/clipseg/processing_clipseg.py | 2 +- .../models/colpali/processing_colpali.py | 2 +- .../models/instructblip/processing_instructblip.py | 2 +- .../models/kosmos2/processing_kosmos2.py | 2 +- .../llava_next_video/processing_llava_next_video.py | 2 +- .../models/mgp_str/processing_mgp_str.py | 2 +- .../models/omdet_turbo/processing_omdet_turbo.py | 2 +- .../models/paligemma/processing_paligemma.py | 2 +- src/transformers/models/siglip/processing_siglip.py | 2 +- src/transformers/processing_utils.py | 12 +++++++++++- tests/models/kosmos2/test_processor_kosmos2.py | 7 +++++-- 16 files changed, 31 insertions(+), 18 deletions(-) diff --git a/src/transformers/models/altclip/processing_altclip.py b/src/transformers/models/altclip/processing_altclip.py index 153ecc2e2b..1198cf2afd 100644 --- a/src/transformers/models/altclip/processing_altclip.py +++ b/src/transformers/models/altclip/processing_altclip.py @@ -44,7 +44,7 @@ class AltCLIPProcessor(ProcessorMixin): """ attributes = ["image_processor", "tokenizer"] - image_processor_class = "CLIPImageProcessor" + image_processor_class = ("CLIPImageProcessor", "CLIPImageProcessorFast") tokenizer_class = ("XLMRobertaTokenizer", "XLMRobertaTokenizerFast") @deprecate_kwarg(old_name="feature_extractor", version="5.0.0", new_name="image_processor") diff --git a/src/transformers/models/auto/image_processing_auto.py b/src/transformers/models/auto/image_processing_auto.py index 40c45fa94b..336d3bf116 100644 --- a/src/transformers/models/auto/image_processing_auto.py +++ b/src/transformers/models/auto/image_processing_auto.py @@ -490,7 +490,7 @@ class AutoImageProcessor: image_processor_auto_map = config.auto_map["AutoImageProcessor"] image_processor_class = None - # TODO: @yoni, change logic in v4.48 (when use_fast set to True by default) + # TODO: @yoni, change logic in v4.50 (when use_fast set to True by default) if image_processor_type is not None: # if use_fast is not set and the processor was saved with a fast processor, we use it, otherwise we use the slow processor. if use_fast is None: @@ -498,7 +498,7 @@ class AutoImageProcessor: if not use_fast: logger.warning_once( "Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. " - "`use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. " + "`use_fast=True` will be the default behavior in v4.50, even if the model was saved with a slow processor. " "This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`." ) # Update class name to reflect the use_fast option. If class is not found, we fall back to the slow version. diff --git a/src/transformers/models/blip/processing_blip.py b/src/transformers/models/blip/processing_blip.py index edef863e40..c65ff6b66f 100644 --- a/src/transformers/models/blip/processing_blip.py +++ b/src/transformers/models/blip/processing_blip.py @@ -56,7 +56,7 @@ class BlipProcessor(ProcessorMixin): attributes = ["image_processor", "tokenizer"] valid_kwargs = [] - image_processor_class = "BlipImageProcessor" + image_processor_class = ("BlipImageProcessor", "BlipImageProcessorFast") tokenizer_class = ("BertTokenizer", "BertTokenizerFast") def __init__(self, image_processor, tokenizer, **kwargs): diff --git a/src/transformers/models/blip_2/processing_blip_2.py b/src/transformers/models/blip_2/processing_blip_2.py index 5d09ea7c07..36b663dccb 100644 --- a/src/transformers/models/blip_2/processing_blip_2.py +++ b/src/transformers/models/blip_2/processing_blip_2.py @@ -68,7 +68,7 @@ class Blip2Processor(ProcessorMixin): attributes = ["image_processor", "tokenizer"] valid_kwargs = ["num_query_tokens"] - image_processor_class = "BlipImageProcessor" + image_processor_class = ("BlipImageProcessor", "BlipImageProcessorFast") tokenizer_class = "AutoTokenizer" def __init__(self, image_processor, tokenizer, num_query_tokens=None, **kwargs): diff --git a/src/transformers/models/clip/processing_clip.py b/src/transformers/models/clip/processing_clip.py index e69e65dec6..0218d2af6a 100644 --- a/src/transformers/models/clip/processing_clip.py +++ b/src/transformers/models/clip/processing_clip.py @@ -37,7 +37,7 @@ class CLIPProcessor(ProcessorMixin): """ attributes = ["image_processor", "tokenizer"] - image_processor_class = "CLIPImageProcessor" + image_processor_class = ("CLIPImageProcessor", "CLIPImageProcessorFast") tokenizer_class = ("CLIPTokenizer", "CLIPTokenizerFast") def __init__(self, image_processor=None, tokenizer=None, **kwargs): diff --git a/src/transformers/models/clipseg/processing_clipseg.py b/src/transformers/models/clipseg/processing_clipseg.py index bd817ae786..fd7fe7c094 100644 --- a/src/transformers/models/clipseg/processing_clipseg.py +++ b/src/transformers/models/clipseg/processing_clipseg.py @@ -37,7 +37,7 @@ class CLIPSegProcessor(ProcessorMixin): """ attributes = ["image_processor", "tokenizer"] - image_processor_class = "ViTImageProcessor" + image_processor_class = ("ViTImageProcessor", "ViTImageProcessorFast") tokenizer_class = ("CLIPTokenizer", "CLIPTokenizerFast") def __init__(self, image_processor=None, tokenizer=None, **kwargs): diff --git a/src/transformers/models/colpali/processing_colpali.py b/src/transformers/models/colpali/processing_colpali.py index 342cd0cd3d..be3ec4e035 100644 --- a/src/transformers/models/colpali/processing_colpali.py +++ b/src/transformers/models/colpali/processing_colpali.py @@ -91,7 +91,7 @@ class ColPaliProcessor(ProcessorMixin): attributes = ["image_processor", "tokenizer"] valid_kwargs = ["chat_template"] - image_processor_class = "SiglipImageProcessor" + image_processor_class = ("SiglipImageProcessor", "SiglipImageProcessorFast") tokenizer_class = ("GemmaTokenizer", "GemmaTokenizerFast") visual_prompt_prefix: ClassVar[str] = "Describe the image." diff --git a/src/transformers/models/instructblip/processing_instructblip.py b/src/transformers/models/instructblip/processing_instructblip.py index 9a46b97875..408dfbd075 100644 --- a/src/transformers/models/instructblip/processing_instructblip.py +++ b/src/transformers/models/instructblip/processing_instructblip.py @@ -73,7 +73,7 @@ class InstructBlipProcessor(ProcessorMixin): attributes = ["image_processor", "tokenizer", "qformer_tokenizer"] valid_kwargs = ["num_query_tokens"] - image_processor_class = "BlipImageProcessor" + image_processor_class = ("BlipImageProcessor", "BlipImageProcessorFast") tokenizer_class = "AutoTokenizer" qformer_tokenizer_class = "AutoTokenizer" diff --git a/src/transformers/models/kosmos2/processing_kosmos2.py b/src/transformers/models/kosmos2/processing_kosmos2.py index 3d1d28622f..73a3f66f9b 100644 --- a/src/transformers/models/kosmos2/processing_kosmos2.py +++ b/src/transformers/models/kosmos2/processing_kosmos2.py @@ -85,7 +85,7 @@ class Kosmos2Processor(ProcessorMixin): attributes = ["image_processor", "tokenizer"] valid_kwargs = ["num_patch_index_tokens"] - image_processor_class = "CLIPImageProcessor" + image_processor_class = ("CLIPImageProcessor", "CLIPImageProcessorFast") tokenizer_class = "AutoTokenizer" def __init__(self, image_processor, tokenizer, num_patch_index_tokens=1024, *kwargs): diff --git a/src/transformers/models/llava_next_video/processing_llava_next_video.py b/src/transformers/models/llava_next_video/processing_llava_next_video.py index 6ec40209df..c6a0f94c06 100644 --- a/src/transformers/models/llava_next_video/processing_llava_next_video.py +++ b/src/transformers/models/llava_next_video/processing_llava_next_video.py @@ -85,7 +85,7 @@ class LlavaNextVideoProcessor(ProcessorMixin): "video_token", "num_additional_image_tokens", ] - image_processor_class = "LlavaNextImageProcessor" + image_processor_class = ("LlavaNextImageProcessor", "LlavaNextImageProcessorFast") video_processor_class = "LlavaNextVideoImageProcessor" tokenizer_class = ("LlamaTokenizer", "LlamaTokenizerFast") diff --git a/src/transformers/models/mgp_str/processing_mgp_str.py b/src/transformers/models/mgp_str/processing_mgp_str.py index 81d49cb10c..66ea06fc18 100644 --- a/src/transformers/models/mgp_str/processing_mgp_str.py +++ b/src/transformers/models/mgp_str/processing_mgp_str.py @@ -51,7 +51,7 @@ class MgpstrProcessor(ProcessorMixin): """ attributes = ["image_processor", "char_tokenizer"] - image_processor_class = "ViTImageProcessor" + image_processor_class = ("ViTImageProcessor", "ViTImageProcessorFast") char_tokenizer_class = "MgpstrTokenizer" def __init__(self, image_processor=None, tokenizer=None, **kwargs): diff --git a/src/transformers/models/omdet_turbo/processing_omdet_turbo.py b/src/transformers/models/omdet_turbo/processing_omdet_turbo.py index f52840e1d0..6d59202e57 100644 --- a/src/transformers/models/omdet_turbo/processing_omdet_turbo.py +++ b/src/transformers/models/omdet_turbo/processing_omdet_turbo.py @@ -216,7 +216,7 @@ class OmDetTurboProcessor(ProcessorMixin): """ attributes = ["image_processor", "tokenizer"] - image_processor_class = "DetrImageProcessor" + image_processor_class = ("DetrImageProcessor", "DetrImageProcessorFast") tokenizer_class = "AutoTokenizer" def __init__(self, image_processor, tokenizer): diff --git a/src/transformers/models/paligemma/processing_paligemma.py b/src/transformers/models/paligemma/processing_paligemma.py index 9419275da6..91deeb3f4f 100644 --- a/src/transformers/models/paligemma/processing_paligemma.py +++ b/src/transformers/models/paligemma/processing_paligemma.py @@ -117,7 +117,7 @@ class PaliGemmaProcessor(ProcessorMixin): attributes = ["image_processor", "tokenizer"] valid_kwargs = ["chat_template"] - image_processor_class = "SiglipImageProcessor" + image_processor_class = ("SiglipImageProcessor", "SiglipImageProcessorFast") tokenizer_class = ("GemmaTokenizer", "GemmaTokenizerFast") def __init__( diff --git a/src/transformers/models/siglip/processing_siglip.py b/src/transformers/models/siglip/processing_siglip.py index 7a37cebabf..21597cb3c6 100644 --- a/src/transformers/models/siglip/processing_siglip.py +++ b/src/transformers/models/siglip/processing_siglip.py @@ -40,7 +40,7 @@ class SiglipProcessor(ProcessorMixin): """ attributes = ["image_processor", "tokenizer"] - image_processor_class = "SiglipImageProcessor" + image_processor_class = ("SiglipImageProcessor", "SiglipImageProcessorFast") tokenizer_class = "AutoTokenizer" def __init__(self, image_processor, tokenizer): diff --git a/src/transformers/processing_utils.py b/src/transformers/processing_utils.py index 614cbe8d76..e709878f1c 100644 --- a/src/transformers/processing_utils.py +++ b/src/transformers/processing_utils.py @@ -1105,7 +1105,17 @@ class ProcessorMixin(PushToHubMixin): class_name = getattr(cls, f"{attribute_name}_class") if isinstance(class_name, tuple): classes = tuple(getattr(transformers_module, n) if n is not None else None for n in class_name) - use_fast = kwargs.get("use_fast", True) + if attribute_name == "image_processor": + # TODO: @yoni, change logic in v4.50 (when use_fast set to True by default) + use_fast = kwargs.get("use_fast", None) + if use_fast is None: + logger.warning_once( + "Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. " + "`use_fast=True` will be the default behavior in v4.50, even if the model was saved with a slow processor. " + "This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`." + ) + else: + use_fast = kwargs.get("use_fast", True) if use_fast and classes[1] is not None: attribute_class = classes[1] else: diff --git a/tests/models/kosmos2/test_processor_kosmos2.py b/tests/models/kosmos2/test_processor_kosmos2.py index 8874c7d1d3..0a34c39681 100644 --- a/tests/models/kosmos2/test_processor_kosmos2.py +++ b/tests/models/kosmos2/test_processor_kosmos2.py @@ -70,12 +70,15 @@ class Kosmos2ProcessorTest(ProcessorTesterMixin, unittest.TestCase): processor = Kosmos2Processor(image_processor, fast_tokenizer) processor.save_pretrained(self.tmpdirname) - # We override this method to take the fast tokenizer or image processor by default + # We override this method to take the fast tokenizer by default def get_component(self, attribute, **kwargs): assert attribute in self.processor_class.attributes component_class_name = getattr(self.processor_class, f"{attribute}_class") if isinstance(component_class_name, tuple): - component_class_name = component_class_name[-1] + if attribute == "image_processor": + component_class_name = component_class_name[0] + else: + component_class_name = component_class_name[-1] component_class = processor_class_from_name(component_class_name) component = component_class.from_pretrained(self.tmpdirname, **kwargs) # noqa