Remove hardcoded slow image processor class in processors supporting fast ones (#36266)
* Add fast image processor class to processors supporting them * fix test kosmos2
This commit is contained in:
@@ -44,7 +44,7 @@ class AltCLIPProcessor(ProcessorMixin):
|
||||
"""
|
||||
|
||||
attributes = ["image_processor", "tokenizer"]
|
||||
image_processor_class = "CLIPImageProcessor"
|
||||
image_processor_class = ("CLIPImageProcessor", "CLIPImageProcessorFast")
|
||||
tokenizer_class = ("XLMRobertaTokenizer", "XLMRobertaTokenizerFast")
|
||||
|
||||
@deprecate_kwarg(old_name="feature_extractor", version="5.0.0", new_name="image_processor")
|
||||
|
||||
@@ -490,7 +490,7 @@ class AutoImageProcessor:
|
||||
image_processor_auto_map = config.auto_map["AutoImageProcessor"]
|
||||
|
||||
image_processor_class = None
|
||||
# TODO: @yoni, change logic in v4.48 (when use_fast set to True by default)
|
||||
# TODO: @yoni, change logic in v4.50 (when use_fast set to True by default)
|
||||
if image_processor_type is not None:
|
||||
# if use_fast is not set and the processor was saved with a fast processor, we use it, otherwise we use the slow processor.
|
||||
if use_fast is None:
|
||||
@@ -498,7 +498,7 @@ class AutoImageProcessor:
|
||||
if not use_fast:
|
||||
logger.warning_once(
|
||||
"Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. "
|
||||
"`use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. "
|
||||
"`use_fast=True` will be the default behavior in v4.50, even if the model was saved with a slow processor. "
|
||||
"This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`."
|
||||
)
|
||||
# Update class name to reflect the use_fast option. If class is not found, we fall back to the slow version.
|
||||
|
||||
@@ -56,7 +56,7 @@ class BlipProcessor(ProcessorMixin):
|
||||
|
||||
attributes = ["image_processor", "tokenizer"]
|
||||
valid_kwargs = []
|
||||
image_processor_class = "BlipImageProcessor"
|
||||
image_processor_class = ("BlipImageProcessor", "BlipImageProcessorFast")
|
||||
tokenizer_class = ("BertTokenizer", "BertTokenizerFast")
|
||||
|
||||
def __init__(self, image_processor, tokenizer, **kwargs):
|
||||
|
||||
@@ -68,7 +68,7 @@ class Blip2Processor(ProcessorMixin):
|
||||
|
||||
attributes = ["image_processor", "tokenizer"]
|
||||
valid_kwargs = ["num_query_tokens"]
|
||||
image_processor_class = "BlipImageProcessor"
|
||||
image_processor_class = ("BlipImageProcessor", "BlipImageProcessorFast")
|
||||
tokenizer_class = "AutoTokenizer"
|
||||
|
||||
def __init__(self, image_processor, tokenizer, num_query_tokens=None, **kwargs):
|
||||
|
||||
@@ -37,7 +37,7 @@ class CLIPProcessor(ProcessorMixin):
|
||||
"""
|
||||
|
||||
attributes = ["image_processor", "tokenizer"]
|
||||
image_processor_class = "CLIPImageProcessor"
|
||||
image_processor_class = ("CLIPImageProcessor", "CLIPImageProcessorFast")
|
||||
tokenizer_class = ("CLIPTokenizer", "CLIPTokenizerFast")
|
||||
|
||||
def __init__(self, image_processor=None, tokenizer=None, **kwargs):
|
||||
|
||||
@@ -37,7 +37,7 @@ class CLIPSegProcessor(ProcessorMixin):
|
||||
"""
|
||||
|
||||
attributes = ["image_processor", "tokenizer"]
|
||||
image_processor_class = "ViTImageProcessor"
|
||||
image_processor_class = ("ViTImageProcessor", "ViTImageProcessorFast")
|
||||
tokenizer_class = ("CLIPTokenizer", "CLIPTokenizerFast")
|
||||
|
||||
def __init__(self, image_processor=None, tokenizer=None, **kwargs):
|
||||
|
||||
@@ -91,7 +91,7 @@ class ColPaliProcessor(ProcessorMixin):
|
||||
|
||||
attributes = ["image_processor", "tokenizer"]
|
||||
valid_kwargs = ["chat_template"]
|
||||
image_processor_class = "SiglipImageProcessor"
|
||||
image_processor_class = ("SiglipImageProcessor", "SiglipImageProcessorFast")
|
||||
tokenizer_class = ("GemmaTokenizer", "GemmaTokenizerFast")
|
||||
|
||||
visual_prompt_prefix: ClassVar[str] = "Describe the image."
|
||||
|
||||
@@ -73,7 +73,7 @@ class InstructBlipProcessor(ProcessorMixin):
|
||||
|
||||
attributes = ["image_processor", "tokenizer", "qformer_tokenizer"]
|
||||
valid_kwargs = ["num_query_tokens"]
|
||||
image_processor_class = "BlipImageProcessor"
|
||||
image_processor_class = ("BlipImageProcessor", "BlipImageProcessorFast")
|
||||
tokenizer_class = "AutoTokenizer"
|
||||
qformer_tokenizer_class = "AutoTokenizer"
|
||||
|
||||
|
||||
@@ -85,7 +85,7 @@ class Kosmos2Processor(ProcessorMixin):
|
||||
|
||||
attributes = ["image_processor", "tokenizer"]
|
||||
valid_kwargs = ["num_patch_index_tokens"]
|
||||
image_processor_class = "CLIPImageProcessor"
|
||||
image_processor_class = ("CLIPImageProcessor", "CLIPImageProcessorFast")
|
||||
tokenizer_class = "AutoTokenizer"
|
||||
|
||||
def __init__(self, image_processor, tokenizer, num_patch_index_tokens=1024, *kwargs):
|
||||
|
||||
@@ -85,7 +85,7 @@ class LlavaNextVideoProcessor(ProcessorMixin):
|
||||
"video_token",
|
||||
"num_additional_image_tokens",
|
||||
]
|
||||
image_processor_class = "LlavaNextImageProcessor"
|
||||
image_processor_class = ("LlavaNextImageProcessor", "LlavaNextImageProcessorFast")
|
||||
video_processor_class = "LlavaNextVideoImageProcessor"
|
||||
tokenizer_class = ("LlamaTokenizer", "LlamaTokenizerFast")
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ class MgpstrProcessor(ProcessorMixin):
|
||||
"""
|
||||
|
||||
attributes = ["image_processor", "char_tokenizer"]
|
||||
image_processor_class = "ViTImageProcessor"
|
||||
image_processor_class = ("ViTImageProcessor", "ViTImageProcessorFast")
|
||||
char_tokenizer_class = "MgpstrTokenizer"
|
||||
|
||||
def __init__(self, image_processor=None, tokenizer=None, **kwargs):
|
||||
|
||||
@@ -216,7 +216,7 @@ class OmDetTurboProcessor(ProcessorMixin):
|
||||
"""
|
||||
|
||||
attributes = ["image_processor", "tokenizer"]
|
||||
image_processor_class = "DetrImageProcessor"
|
||||
image_processor_class = ("DetrImageProcessor", "DetrImageProcessorFast")
|
||||
tokenizer_class = "AutoTokenizer"
|
||||
|
||||
def __init__(self, image_processor, tokenizer):
|
||||
|
||||
@@ -117,7 +117,7 @@ class PaliGemmaProcessor(ProcessorMixin):
|
||||
|
||||
attributes = ["image_processor", "tokenizer"]
|
||||
valid_kwargs = ["chat_template"]
|
||||
image_processor_class = "SiglipImageProcessor"
|
||||
image_processor_class = ("SiglipImageProcessor", "SiglipImageProcessorFast")
|
||||
tokenizer_class = ("GemmaTokenizer", "GemmaTokenizerFast")
|
||||
|
||||
def __init__(
|
||||
|
||||
@@ -40,7 +40,7 @@ class SiglipProcessor(ProcessorMixin):
|
||||
"""
|
||||
|
||||
attributes = ["image_processor", "tokenizer"]
|
||||
image_processor_class = "SiglipImageProcessor"
|
||||
image_processor_class = ("SiglipImageProcessor", "SiglipImageProcessorFast")
|
||||
tokenizer_class = "AutoTokenizer"
|
||||
|
||||
def __init__(self, image_processor, tokenizer):
|
||||
|
||||
@@ -1105,6 +1105,16 @@ class ProcessorMixin(PushToHubMixin):
|
||||
class_name = getattr(cls, f"{attribute_name}_class")
|
||||
if isinstance(class_name, tuple):
|
||||
classes = tuple(getattr(transformers_module, n) if n is not None else None for n in class_name)
|
||||
if attribute_name == "image_processor":
|
||||
# TODO: @yoni, change logic in v4.50 (when use_fast set to True by default)
|
||||
use_fast = kwargs.get("use_fast", None)
|
||||
if use_fast is None:
|
||||
logger.warning_once(
|
||||
"Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. "
|
||||
"`use_fast=True` will be the default behavior in v4.50, even if the model was saved with a slow processor. "
|
||||
"This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`."
|
||||
)
|
||||
else:
|
||||
use_fast = kwargs.get("use_fast", True)
|
||||
if use_fast and classes[1] is not None:
|
||||
attribute_class = classes[1]
|
||||
|
||||
@@ -70,11 +70,14 @@ class Kosmos2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
||||
processor = Kosmos2Processor(image_processor, fast_tokenizer)
|
||||
processor.save_pretrained(self.tmpdirname)
|
||||
|
||||
# We override this method to take the fast tokenizer or image processor by default
|
||||
# We override this method to take the fast tokenizer by default
|
||||
def get_component(self, attribute, **kwargs):
|
||||
assert attribute in self.processor_class.attributes
|
||||
component_class_name = getattr(self.processor_class, f"{attribute}_class")
|
||||
if isinstance(component_class_name, tuple):
|
||||
if attribute == "image_processor":
|
||||
component_class_name = component_class_name[0]
|
||||
else:
|
||||
component_class_name = component_class_name[-1]
|
||||
|
||||
component_class = processor_class_from_name(component_class_name)
|
||||
|
||||
Reference in New Issue
Block a user