Remove hardcoded slow image processor class in processors supporting fast ones (#36266)
* Add fast image processor class to processors supporting them * fix test kosmos2
This commit is contained in:
@@ -44,7 +44,7 @@ class AltCLIPProcessor(ProcessorMixin):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
attributes = ["image_processor", "tokenizer"]
|
attributes = ["image_processor", "tokenizer"]
|
||||||
image_processor_class = "CLIPImageProcessor"
|
image_processor_class = ("CLIPImageProcessor", "CLIPImageProcessorFast")
|
||||||
tokenizer_class = ("XLMRobertaTokenizer", "XLMRobertaTokenizerFast")
|
tokenizer_class = ("XLMRobertaTokenizer", "XLMRobertaTokenizerFast")
|
||||||
|
|
||||||
@deprecate_kwarg(old_name="feature_extractor", version="5.0.0", new_name="image_processor")
|
@deprecate_kwarg(old_name="feature_extractor", version="5.0.0", new_name="image_processor")
|
||||||
|
|||||||
@@ -490,7 +490,7 @@ class AutoImageProcessor:
|
|||||||
image_processor_auto_map = config.auto_map["AutoImageProcessor"]
|
image_processor_auto_map = config.auto_map["AutoImageProcessor"]
|
||||||
|
|
||||||
image_processor_class = None
|
image_processor_class = None
|
||||||
# TODO: @yoni, change logic in v4.48 (when use_fast set to True by default)
|
# TODO: @yoni, change logic in v4.50 (when use_fast set to True by default)
|
||||||
if image_processor_type is not None:
|
if image_processor_type is not None:
|
||||||
# if use_fast is not set and the processor was saved with a fast processor, we use it, otherwise we use the slow processor.
|
# if use_fast is not set and the processor was saved with a fast processor, we use it, otherwise we use the slow processor.
|
||||||
if use_fast is None:
|
if use_fast is None:
|
||||||
@@ -498,7 +498,7 @@ class AutoImageProcessor:
|
|||||||
if not use_fast:
|
if not use_fast:
|
||||||
logger.warning_once(
|
logger.warning_once(
|
||||||
"Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. "
|
"Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. "
|
||||||
"`use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. "
|
"`use_fast=True` will be the default behavior in v4.50, even if the model was saved with a slow processor. "
|
||||||
"This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`."
|
"This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`."
|
||||||
)
|
)
|
||||||
# Update class name to reflect the use_fast option. If class is not found, we fall back to the slow version.
|
# Update class name to reflect the use_fast option. If class is not found, we fall back to the slow version.
|
||||||
|
|||||||
@@ -56,7 +56,7 @@ class BlipProcessor(ProcessorMixin):
|
|||||||
|
|
||||||
attributes = ["image_processor", "tokenizer"]
|
attributes = ["image_processor", "tokenizer"]
|
||||||
valid_kwargs = []
|
valid_kwargs = []
|
||||||
image_processor_class = "BlipImageProcessor"
|
image_processor_class = ("BlipImageProcessor", "BlipImageProcessorFast")
|
||||||
tokenizer_class = ("BertTokenizer", "BertTokenizerFast")
|
tokenizer_class = ("BertTokenizer", "BertTokenizerFast")
|
||||||
|
|
||||||
def __init__(self, image_processor, tokenizer, **kwargs):
|
def __init__(self, image_processor, tokenizer, **kwargs):
|
||||||
|
|||||||
@@ -68,7 +68,7 @@ class Blip2Processor(ProcessorMixin):
|
|||||||
|
|
||||||
attributes = ["image_processor", "tokenizer"]
|
attributes = ["image_processor", "tokenizer"]
|
||||||
valid_kwargs = ["num_query_tokens"]
|
valid_kwargs = ["num_query_tokens"]
|
||||||
image_processor_class = "BlipImageProcessor"
|
image_processor_class = ("BlipImageProcessor", "BlipImageProcessorFast")
|
||||||
tokenizer_class = "AutoTokenizer"
|
tokenizer_class = "AutoTokenizer"
|
||||||
|
|
||||||
def __init__(self, image_processor, tokenizer, num_query_tokens=None, **kwargs):
|
def __init__(self, image_processor, tokenizer, num_query_tokens=None, **kwargs):
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ class CLIPProcessor(ProcessorMixin):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
attributes = ["image_processor", "tokenizer"]
|
attributes = ["image_processor", "tokenizer"]
|
||||||
image_processor_class = "CLIPImageProcessor"
|
image_processor_class = ("CLIPImageProcessor", "CLIPImageProcessorFast")
|
||||||
tokenizer_class = ("CLIPTokenizer", "CLIPTokenizerFast")
|
tokenizer_class = ("CLIPTokenizer", "CLIPTokenizerFast")
|
||||||
|
|
||||||
def __init__(self, image_processor=None, tokenizer=None, **kwargs):
|
def __init__(self, image_processor=None, tokenizer=None, **kwargs):
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ class CLIPSegProcessor(ProcessorMixin):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
attributes = ["image_processor", "tokenizer"]
|
attributes = ["image_processor", "tokenizer"]
|
||||||
image_processor_class = "ViTImageProcessor"
|
image_processor_class = ("ViTImageProcessor", "ViTImageProcessorFast")
|
||||||
tokenizer_class = ("CLIPTokenizer", "CLIPTokenizerFast")
|
tokenizer_class = ("CLIPTokenizer", "CLIPTokenizerFast")
|
||||||
|
|
||||||
def __init__(self, image_processor=None, tokenizer=None, **kwargs):
|
def __init__(self, image_processor=None, tokenizer=None, **kwargs):
|
||||||
|
|||||||
@@ -91,7 +91,7 @@ class ColPaliProcessor(ProcessorMixin):
|
|||||||
|
|
||||||
attributes = ["image_processor", "tokenizer"]
|
attributes = ["image_processor", "tokenizer"]
|
||||||
valid_kwargs = ["chat_template"]
|
valid_kwargs = ["chat_template"]
|
||||||
image_processor_class = "SiglipImageProcessor"
|
image_processor_class = ("SiglipImageProcessor", "SiglipImageProcessorFast")
|
||||||
tokenizer_class = ("GemmaTokenizer", "GemmaTokenizerFast")
|
tokenizer_class = ("GemmaTokenizer", "GemmaTokenizerFast")
|
||||||
|
|
||||||
visual_prompt_prefix: ClassVar[str] = "Describe the image."
|
visual_prompt_prefix: ClassVar[str] = "Describe the image."
|
||||||
|
|||||||
@@ -73,7 +73,7 @@ class InstructBlipProcessor(ProcessorMixin):
|
|||||||
|
|
||||||
attributes = ["image_processor", "tokenizer", "qformer_tokenizer"]
|
attributes = ["image_processor", "tokenizer", "qformer_tokenizer"]
|
||||||
valid_kwargs = ["num_query_tokens"]
|
valid_kwargs = ["num_query_tokens"]
|
||||||
image_processor_class = "BlipImageProcessor"
|
image_processor_class = ("BlipImageProcessor", "BlipImageProcessorFast")
|
||||||
tokenizer_class = "AutoTokenizer"
|
tokenizer_class = "AutoTokenizer"
|
||||||
qformer_tokenizer_class = "AutoTokenizer"
|
qformer_tokenizer_class = "AutoTokenizer"
|
||||||
|
|
||||||
|
|||||||
@@ -85,7 +85,7 @@ class Kosmos2Processor(ProcessorMixin):
|
|||||||
|
|
||||||
attributes = ["image_processor", "tokenizer"]
|
attributes = ["image_processor", "tokenizer"]
|
||||||
valid_kwargs = ["num_patch_index_tokens"]
|
valid_kwargs = ["num_patch_index_tokens"]
|
||||||
image_processor_class = "CLIPImageProcessor"
|
image_processor_class = ("CLIPImageProcessor", "CLIPImageProcessorFast")
|
||||||
tokenizer_class = "AutoTokenizer"
|
tokenizer_class = "AutoTokenizer"
|
||||||
|
|
||||||
def __init__(self, image_processor, tokenizer, num_patch_index_tokens=1024, *kwargs):
|
def __init__(self, image_processor, tokenizer, num_patch_index_tokens=1024, *kwargs):
|
||||||
|
|||||||
@@ -85,7 +85,7 @@ class LlavaNextVideoProcessor(ProcessorMixin):
|
|||||||
"video_token",
|
"video_token",
|
||||||
"num_additional_image_tokens",
|
"num_additional_image_tokens",
|
||||||
]
|
]
|
||||||
image_processor_class = "LlavaNextImageProcessor"
|
image_processor_class = ("LlavaNextImageProcessor", "LlavaNextImageProcessorFast")
|
||||||
video_processor_class = "LlavaNextVideoImageProcessor"
|
video_processor_class = "LlavaNextVideoImageProcessor"
|
||||||
tokenizer_class = ("LlamaTokenizer", "LlamaTokenizerFast")
|
tokenizer_class = ("LlamaTokenizer", "LlamaTokenizerFast")
|
||||||
|
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ class MgpstrProcessor(ProcessorMixin):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
attributes = ["image_processor", "char_tokenizer"]
|
attributes = ["image_processor", "char_tokenizer"]
|
||||||
image_processor_class = "ViTImageProcessor"
|
image_processor_class = ("ViTImageProcessor", "ViTImageProcessorFast")
|
||||||
char_tokenizer_class = "MgpstrTokenizer"
|
char_tokenizer_class = "MgpstrTokenizer"
|
||||||
|
|
||||||
def __init__(self, image_processor=None, tokenizer=None, **kwargs):
|
def __init__(self, image_processor=None, tokenizer=None, **kwargs):
|
||||||
|
|||||||
@@ -216,7 +216,7 @@ class OmDetTurboProcessor(ProcessorMixin):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
attributes = ["image_processor", "tokenizer"]
|
attributes = ["image_processor", "tokenizer"]
|
||||||
image_processor_class = "DetrImageProcessor"
|
image_processor_class = ("DetrImageProcessor", "DetrImageProcessorFast")
|
||||||
tokenizer_class = "AutoTokenizer"
|
tokenizer_class = "AutoTokenizer"
|
||||||
|
|
||||||
def __init__(self, image_processor, tokenizer):
|
def __init__(self, image_processor, tokenizer):
|
||||||
|
|||||||
@@ -117,7 +117,7 @@ class PaliGemmaProcessor(ProcessorMixin):
|
|||||||
|
|
||||||
attributes = ["image_processor", "tokenizer"]
|
attributes = ["image_processor", "tokenizer"]
|
||||||
valid_kwargs = ["chat_template"]
|
valid_kwargs = ["chat_template"]
|
||||||
image_processor_class = "SiglipImageProcessor"
|
image_processor_class = ("SiglipImageProcessor", "SiglipImageProcessorFast")
|
||||||
tokenizer_class = ("GemmaTokenizer", "GemmaTokenizerFast")
|
tokenizer_class = ("GemmaTokenizer", "GemmaTokenizerFast")
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ class SiglipProcessor(ProcessorMixin):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
attributes = ["image_processor", "tokenizer"]
|
attributes = ["image_processor", "tokenizer"]
|
||||||
image_processor_class = "SiglipImageProcessor"
|
image_processor_class = ("SiglipImageProcessor", "SiglipImageProcessorFast")
|
||||||
tokenizer_class = "AutoTokenizer"
|
tokenizer_class = "AutoTokenizer"
|
||||||
|
|
||||||
def __init__(self, image_processor, tokenizer):
|
def __init__(self, image_processor, tokenizer):
|
||||||
|
|||||||
@@ -1105,6 +1105,16 @@ class ProcessorMixin(PushToHubMixin):
|
|||||||
class_name = getattr(cls, f"{attribute_name}_class")
|
class_name = getattr(cls, f"{attribute_name}_class")
|
||||||
if isinstance(class_name, tuple):
|
if isinstance(class_name, tuple):
|
||||||
classes = tuple(getattr(transformers_module, n) if n is not None else None for n in class_name)
|
classes = tuple(getattr(transformers_module, n) if n is not None else None for n in class_name)
|
||||||
|
if attribute_name == "image_processor":
|
||||||
|
# TODO: @yoni, change logic in v4.50 (when use_fast set to True by default)
|
||||||
|
use_fast = kwargs.get("use_fast", None)
|
||||||
|
if use_fast is None:
|
||||||
|
logger.warning_once(
|
||||||
|
"Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. "
|
||||||
|
"`use_fast=True` will be the default behavior in v4.50, even if the model was saved with a slow processor. "
|
||||||
|
"This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`."
|
||||||
|
)
|
||||||
|
else:
|
||||||
use_fast = kwargs.get("use_fast", True)
|
use_fast = kwargs.get("use_fast", True)
|
||||||
if use_fast and classes[1] is not None:
|
if use_fast and classes[1] is not None:
|
||||||
attribute_class = classes[1]
|
attribute_class = classes[1]
|
||||||
|
|||||||
@@ -70,11 +70,14 @@ class Kosmos2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
|
|||||||
processor = Kosmos2Processor(image_processor, fast_tokenizer)
|
processor = Kosmos2Processor(image_processor, fast_tokenizer)
|
||||||
processor.save_pretrained(self.tmpdirname)
|
processor.save_pretrained(self.tmpdirname)
|
||||||
|
|
||||||
# We override this method to take the fast tokenizer or image processor by default
|
# We override this method to take the fast tokenizer by default
|
||||||
def get_component(self, attribute, **kwargs):
|
def get_component(self, attribute, **kwargs):
|
||||||
assert attribute in self.processor_class.attributes
|
assert attribute in self.processor_class.attributes
|
||||||
component_class_name = getattr(self.processor_class, f"{attribute}_class")
|
component_class_name = getattr(self.processor_class, f"{attribute}_class")
|
||||||
if isinstance(component_class_name, tuple):
|
if isinstance(component_class_name, tuple):
|
||||||
|
if attribute == "image_processor":
|
||||||
|
component_class_name = component_class_name[0]
|
||||||
|
else:
|
||||||
component_class_name = component_class_name[-1]
|
component_class_name = component_class_name[-1]
|
||||||
|
|
||||||
component_class = processor_class_from_name(component_class_name)
|
component_class = processor_class_from_name(component_class_name)
|
||||||
|
|||||||
Reference in New Issue
Block a user