Remove hardcoded slow image processor class in processors supporting fast ones (#36266)

* Add fast image processor class to processors supporting them

* fix test kosmos2
This commit is contained in:
Yoni Gozlan
2025-03-12 18:39:25 -04:00
committed by GitHub
parent 0013ba61e5
commit bc3253f076
16 changed files with 31 additions and 18 deletions

View File

@@ -44,7 +44,7 @@ class AltCLIPProcessor(ProcessorMixin):
""" """
attributes = ["image_processor", "tokenizer"] attributes = ["image_processor", "tokenizer"]
image_processor_class = "CLIPImageProcessor" image_processor_class = ("CLIPImageProcessor", "CLIPImageProcessorFast")
tokenizer_class = ("XLMRobertaTokenizer", "XLMRobertaTokenizerFast") tokenizer_class = ("XLMRobertaTokenizer", "XLMRobertaTokenizerFast")
@deprecate_kwarg(old_name="feature_extractor", version="5.0.0", new_name="image_processor") @deprecate_kwarg(old_name="feature_extractor", version="5.0.0", new_name="image_processor")

View File

@@ -490,7 +490,7 @@ class AutoImageProcessor:
image_processor_auto_map = config.auto_map["AutoImageProcessor"] image_processor_auto_map = config.auto_map["AutoImageProcessor"]
image_processor_class = None image_processor_class = None
# TODO: @yoni, change logic in v4.48 (when use_fast set to True by default) # TODO: @yoni, change logic in v4.50 (when use_fast set to True by default)
if image_processor_type is not None: if image_processor_type is not None:
# if use_fast is not set and the processor was saved with a fast processor, we use it, otherwise we use the slow processor. # if use_fast is not set and the processor was saved with a fast processor, we use it, otherwise we use the slow processor.
if use_fast is None: if use_fast is None:
@@ -498,7 +498,7 @@ class AutoImageProcessor:
if not use_fast: if not use_fast:
logger.warning_once( logger.warning_once(
"Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. " "Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. "
"`use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. " "`use_fast=True` will be the default behavior in v4.50, even if the model was saved with a slow processor. "
"This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`." "This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`."
) )
# Update class name to reflect the use_fast option. If class is not found, we fall back to the slow version. # Update class name to reflect the use_fast option. If class is not found, we fall back to the slow version.

View File

@@ -56,7 +56,7 @@ class BlipProcessor(ProcessorMixin):
attributes = ["image_processor", "tokenizer"] attributes = ["image_processor", "tokenizer"]
valid_kwargs = [] valid_kwargs = []
image_processor_class = "BlipImageProcessor" image_processor_class = ("BlipImageProcessor", "BlipImageProcessorFast")
tokenizer_class = ("BertTokenizer", "BertTokenizerFast") tokenizer_class = ("BertTokenizer", "BertTokenizerFast")
def __init__(self, image_processor, tokenizer, **kwargs): def __init__(self, image_processor, tokenizer, **kwargs):

View File

@@ -68,7 +68,7 @@ class Blip2Processor(ProcessorMixin):
attributes = ["image_processor", "tokenizer"] attributes = ["image_processor", "tokenizer"]
valid_kwargs = ["num_query_tokens"] valid_kwargs = ["num_query_tokens"]
image_processor_class = "BlipImageProcessor" image_processor_class = ("BlipImageProcessor", "BlipImageProcessorFast")
tokenizer_class = "AutoTokenizer" tokenizer_class = "AutoTokenizer"
def __init__(self, image_processor, tokenizer, num_query_tokens=None, **kwargs): def __init__(self, image_processor, tokenizer, num_query_tokens=None, **kwargs):

View File

@@ -37,7 +37,7 @@ class CLIPProcessor(ProcessorMixin):
""" """
attributes = ["image_processor", "tokenizer"] attributes = ["image_processor", "tokenizer"]
image_processor_class = "CLIPImageProcessor" image_processor_class = ("CLIPImageProcessor", "CLIPImageProcessorFast")
tokenizer_class = ("CLIPTokenizer", "CLIPTokenizerFast") tokenizer_class = ("CLIPTokenizer", "CLIPTokenizerFast")
def __init__(self, image_processor=None, tokenizer=None, **kwargs): def __init__(self, image_processor=None, tokenizer=None, **kwargs):

View File

@@ -37,7 +37,7 @@ class CLIPSegProcessor(ProcessorMixin):
""" """
attributes = ["image_processor", "tokenizer"] attributes = ["image_processor", "tokenizer"]
image_processor_class = "ViTImageProcessor" image_processor_class = ("ViTImageProcessor", "ViTImageProcessorFast")
tokenizer_class = ("CLIPTokenizer", "CLIPTokenizerFast") tokenizer_class = ("CLIPTokenizer", "CLIPTokenizerFast")
def __init__(self, image_processor=None, tokenizer=None, **kwargs): def __init__(self, image_processor=None, tokenizer=None, **kwargs):

View File

@@ -91,7 +91,7 @@ class ColPaliProcessor(ProcessorMixin):
attributes = ["image_processor", "tokenizer"] attributes = ["image_processor", "tokenizer"]
valid_kwargs = ["chat_template"] valid_kwargs = ["chat_template"]
image_processor_class = "SiglipImageProcessor" image_processor_class = ("SiglipImageProcessor", "SiglipImageProcessorFast")
tokenizer_class = ("GemmaTokenizer", "GemmaTokenizerFast") tokenizer_class = ("GemmaTokenizer", "GemmaTokenizerFast")
visual_prompt_prefix: ClassVar[str] = "Describe the image." visual_prompt_prefix: ClassVar[str] = "Describe the image."

View File

@@ -73,7 +73,7 @@ class InstructBlipProcessor(ProcessorMixin):
attributes = ["image_processor", "tokenizer", "qformer_tokenizer"] attributes = ["image_processor", "tokenizer", "qformer_tokenizer"]
valid_kwargs = ["num_query_tokens"] valid_kwargs = ["num_query_tokens"]
image_processor_class = "BlipImageProcessor" image_processor_class = ("BlipImageProcessor", "BlipImageProcessorFast")
tokenizer_class = "AutoTokenizer" tokenizer_class = "AutoTokenizer"
qformer_tokenizer_class = "AutoTokenizer" qformer_tokenizer_class = "AutoTokenizer"

View File

@@ -85,7 +85,7 @@ class Kosmos2Processor(ProcessorMixin):
attributes = ["image_processor", "tokenizer"] attributes = ["image_processor", "tokenizer"]
valid_kwargs = ["num_patch_index_tokens"] valid_kwargs = ["num_patch_index_tokens"]
image_processor_class = "CLIPImageProcessor" image_processor_class = ("CLIPImageProcessor", "CLIPImageProcessorFast")
tokenizer_class = "AutoTokenizer" tokenizer_class = "AutoTokenizer"
def __init__(self, image_processor, tokenizer, num_patch_index_tokens=1024, *kwargs): def __init__(self, image_processor, tokenizer, num_patch_index_tokens=1024, *kwargs):

View File

@@ -85,7 +85,7 @@ class LlavaNextVideoProcessor(ProcessorMixin):
"video_token", "video_token",
"num_additional_image_tokens", "num_additional_image_tokens",
] ]
image_processor_class = "LlavaNextImageProcessor" image_processor_class = ("LlavaNextImageProcessor", "LlavaNextImageProcessorFast")
video_processor_class = "LlavaNextVideoImageProcessor" video_processor_class = "LlavaNextVideoImageProcessor"
tokenizer_class = ("LlamaTokenizer", "LlamaTokenizerFast") tokenizer_class = ("LlamaTokenizer", "LlamaTokenizerFast")

View File

@@ -51,7 +51,7 @@ class MgpstrProcessor(ProcessorMixin):
""" """
attributes = ["image_processor", "char_tokenizer"] attributes = ["image_processor", "char_tokenizer"]
image_processor_class = "ViTImageProcessor" image_processor_class = ("ViTImageProcessor", "ViTImageProcessorFast")
char_tokenizer_class = "MgpstrTokenizer" char_tokenizer_class = "MgpstrTokenizer"
def __init__(self, image_processor=None, tokenizer=None, **kwargs): def __init__(self, image_processor=None, tokenizer=None, **kwargs):

View File

@@ -216,7 +216,7 @@ class OmDetTurboProcessor(ProcessorMixin):
""" """
attributes = ["image_processor", "tokenizer"] attributes = ["image_processor", "tokenizer"]
image_processor_class = "DetrImageProcessor" image_processor_class = ("DetrImageProcessor", "DetrImageProcessorFast")
tokenizer_class = "AutoTokenizer" tokenizer_class = "AutoTokenizer"
def __init__(self, image_processor, tokenizer): def __init__(self, image_processor, tokenizer):

View File

@@ -117,7 +117,7 @@ class PaliGemmaProcessor(ProcessorMixin):
attributes = ["image_processor", "tokenizer"] attributes = ["image_processor", "tokenizer"]
valid_kwargs = ["chat_template"] valid_kwargs = ["chat_template"]
image_processor_class = "SiglipImageProcessor" image_processor_class = ("SiglipImageProcessor", "SiglipImageProcessorFast")
tokenizer_class = ("GemmaTokenizer", "GemmaTokenizerFast") tokenizer_class = ("GemmaTokenizer", "GemmaTokenizerFast")
def __init__( def __init__(

View File

@@ -40,7 +40,7 @@ class SiglipProcessor(ProcessorMixin):
""" """
attributes = ["image_processor", "tokenizer"] attributes = ["image_processor", "tokenizer"]
image_processor_class = "SiglipImageProcessor" image_processor_class = ("SiglipImageProcessor", "SiglipImageProcessorFast")
tokenizer_class = "AutoTokenizer" tokenizer_class = "AutoTokenizer"
def __init__(self, image_processor, tokenizer): def __init__(self, image_processor, tokenizer):

View File

@@ -1105,6 +1105,16 @@ class ProcessorMixin(PushToHubMixin):
class_name = getattr(cls, f"{attribute_name}_class") class_name = getattr(cls, f"{attribute_name}_class")
if isinstance(class_name, tuple): if isinstance(class_name, tuple):
classes = tuple(getattr(transformers_module, n) if n is not None else None for n in class_name) classes = tuple(getattr(transformers_module, n) if n is not None else None for n in class_name)
if attribute_name == "image_processor":
# TODO: @yoni, change logic in v4.50 (when use_fast set to True by default)
use_fast = kwargs.get("use_fast", None)
if use_fast is None:
logger.warning_once(
"Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. "
"`use_fast=True` will be the default behavior in v4.50, even if the model was saved with a slow processor. "
"This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`."
)
else:
use_fast = kwargs.get("use_fast", True) use_fast = kwargs.get("use_fast", True)
if use_fast and classes[1] is not None: if use_fast and classes[1] is not None:
attribute_class = classes[1] attribute_class = classes[1]

View File

@@ -70,11 +70,14 @@ class Kosmos2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
processor = Kosmos2Processor(image_processor, fast_tokenizer) processor = Kosmos2Processor(image_processor, fast_tokenizer)
processor.save_pretrained(self.tmpdirname) processor.save_pretrained(self.tmpdirname)
# We override this method to take the fast tokenizer or image processor by default # We override this method to take the fast tokenizer by default
def get_component(self, attribute, **kwargs): def get_component(self, attribute, **kwargs):
assert attribute in self.processor_class.attributes assert attribute in self.processor_class.attributes
component_class_name = getattr(self.processor_class, f"{attribute}_class") component_class_name = getattr(self.processor_class, f"{attribute}_class")
if isinstance(component_class_name, tuple): if isinstance(component_class_name, tuple):
if attribute == "image_processor":
component_class_name = component_class_name[0]
else:
component_class_name = component_class_name[-1] component_class_name = component_class_name[-1]
component_class = processor_class_from_name(component_class_name) component_class = processor_class_from_name(component_class_name)