From 99e79054225c4547bb2870526a287320aef0bd32 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 25 Jan 2023 10:16:31 +0100 Subject: [PATCH] Supporting `ImageProcessor` in place of `FeatureExtractor` for pipelines (#20851) * Fixing the pipeline with image processor. * Update the slow test. * Using only the first image processor. * Include exclusion mecanism for Image processor. * Do not handle Gitconfig, deemed as a bug. * Apply suggestions from code review Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Remove `conversational` changes. They are not supposed to be here. * Address first row of comments. * Remove OneFormer modifications. Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> --- src/transformers/pipelines/__init__.py | 32 +++++++++++++ src/transformers/pipelines/base.py | 12 ++++- .../pipelines/image_segmentation.py | 20 +++++--- src/transformers/utils/generic.py | 13 +++-- .../test_pipelines_audio_classification.py | 2 +- ..._pipelines_automatic_speech_recognition.py | 2 +- tests/pipelines/test_pipelines_common.py | 47 +++++++++++++++---- .../test_pipelines_conversational.py | 2 +- .../test_pipelines_depth_estimation.py | 2 +- ...t_pipelines_document_question_answering.py | 2 +- .../test_pipelines_feature_extraction.py | 2 +- tests/pipelines/test_pipelines_fill_mask.py | 2 +- .../test_pipelines_image_classification.py | 2 +- .../test_pipelines_image_segmentation.py | 17 +++++-- .../pipelines/test_pipelines_image_to_text.py | 2 +- .../test_pipelines_object_detection.py | 2 +- .../test_pipelines_question_answering.py | 2 +- .../pipelines/test_pipelines_summarization.py | 2 +- .../test_pipelines_text2text_generation.py | 2 +- .../test_pipelines_text_classification.py | 2 +- .../test_pipelines_text_generation.py | 2 +- .../test_pipelines_token_classification.py | 2 +- tests/pipelines/test_pipelines_translation.py | 2 +- .../test_pipelines_video_classification.py | 2 +- ...est_pipelines_visual_question_answering.py | 2 +- tests/pipelines/test_pipelines_zero_shot.py | 2 +- ...ipelines_zero_shot_image_classification.py | 2 +- ...st_pipelines_zero_shot_object_detection.py | 2 +- 28 files changed, 138 insertions(+), 47 deletions(-) diff --git a/src/transformers/pipelines/__init__.py b/src/transformers/pipelines/__init__.py index fd231930f1..992f14f26d 100755 --- a/src/transformers/pipelines/__init__.py +++ b/src/transformers/pipelines/__init__.py @@ -31,8 +31,10 @@ from huggingface_hub import model_info from ..configuration_utils import PretrainedConfig from ..dynamic_module_utils import get_class_from_dynamic_module from ..feature_extraction_utils import PreTrainedFeatureExtractor +from ..image_processing_utils import BaseImageProcessor from ..models.auto.configuration_auto import AutoConfig from ..models.auto.feature_extraction_auto import FEATURE_EXTRACTOR_MAPPING, AutoFeatureExtractor +from ..models.auto.image_processing_auto import IMAGE_PROCESSOR_MAPPING, AutoImageProcessor from ..models.auto.modeling_auto import AutoModelForDepthEstimation from ..models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer from ..tokenization_utils import PreTrainedTokenizer @@ -374,6 +376,7 @@ SUPPORTED_TASKS = { } NO_FEATURE_EXTRACTOR_TASKS = set() +NO_IMAGE_PROCESSOR_TASKS = set() NO_TOKENIZER_TASKS = set() # Those model configs are special, they are generic over their task, meaning # any tokenizer/feature_extractor might be use for a given model so we cannot @@ -383,6 +386,7 @@ MULTI_MODEL_CONFIGS = {"SpeechEncoderDecoderConfig", "VisionEncoderDecoderConfig for task, values in SUPPORTED_TASKS.items(): if values["type"] == "text": NO_FEATURE_EXTRACTOR_TASKS.add(task) + NO_IMAGE_PROCESSOR_TASKS.add(task) elif values["type"] in {"audio", "image", "video"}: NO_TOKENIZER_TASKS.add(task) elif values["type"] != "multimodal": @@ -482,6 +486,7 @@ def pipeline( config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, PreTrainedTokenizerFast]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, + image_processor: Optional[Union[str, BaseImageProcessor]] = None, framework: Optional[str] = None, revision: Optional[str] = None, use_fast: bool = True, @@ -766,6 +771,7 @@ def pipeline( load_tokenizer = type(model_config) in TOKENIZER_MAPPING or model_config.tokenizer_class is not None load_feature_extractor = type(model_config) in FEATURE_EXTRACTOR_MAPPING or feature_extractor is not None + load_image_processor = type(model_config) in IMAGE_PROCESSOR_MAPPING or image_processor is not None if ( tokenizer is None @@ -799,6 +805,8 @@ def pipeline( if task in NO_FEATURE_EXTRACTOR_TASKS: load_feature_extractor = False + if task in NO_IMAGE_PROCESSOR_TASKS: + load_image_processor = False if load_tokenizer: # Try to infer tokenizer from model or config name (if provided as str) @@ -829,6 +837,27 @@ def pipeline( tokenizer_identifier, use_fast=use_fast, _from_pipeline=task, **hub_kwargs, **tokenizer_kwargs ) + if load_image_processor: + # Try to infer image processor from model or config name (if provided as str) + if image_processor is None: + if isinstance(model_name, str): + image_processor = model_name + elif isinstance(config, str): + image_processor = config + else: + # Impossible to guess what is the right image_processor here + raise Exception( + "Impossible to guess which image processor to use. " + "Please provide a PreTrainedImageProcessor class or a path/identifier " + "to a pretrained image processor." + ) + + # Instantiate image_processor if needed + if isinstance(image_processor, (str, tuple)): + image_processor = AutoImageProcessor.from_pretrained( + image_processor, _from_pipeline=task, **hub_kwargs, **model_kwargs + ) + if load_feature_extractor: # Try to infer feature extractor from model or config name (if provided as str) if feature_extractor is None: @@ -897,6 +926,9 @@ def pipeline( if torch_dtype is not None: kwargs["torch_dtype"] = torch_dtype + if image_processor is not None: + kwargs["image_processor"] = image_processor + if device is not None: kwargs["device"] = device diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index 28d6ee1937..3905d28d26 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -31,6 +31,7 @@ from packaging import version from ..dynamic_module_utils import custom_object_save from ..feature_extraction_utils import PreTrainedFeatureExtractor +from ..image_processing_utils import BaseImageProcessor from ..modelcard import ModelCard from ..models.auto.configuration_auto import AutoConfig from ..tokenization_utils import PreTrainedTokenizer @@ -743,6 +744,7 @@ class Pipeline(_ScikitCompat): model: Union["PreTrainedModel", "TFPreTrainedModel"], tokenizer: Optional[PreTrainedTokenizer] = None, feature_extractor: Optional[PreTrainedFeatureExtractor] = None, + image_processor: Optional[BaseImageProcessor] = None, modelcard: Optional[ModelCard] = None, framework: Optional[str] = None, task: str = "", @@ -759,6 +761,7 @@ class Pipeline(_ScikitCompat): self.model = model self.tokenizer = tokenizer self.feature_extractor = feature_extractor + self.image_processor = image_processor self.modelcard = modelcard self.framework = framework if is_torch_available() and self.framework == "pt": @@ -1012,7 +1015,9 @@ class Pipeline(_ScikitCompat): if "TOKENIZERS_PARALLELISM" not in os.environ: logger.info("Disabling tokenizer parallelism, we're using DataLoader multithreading already") os.environ["TOKENIZERS_PARALLELISM"] = "false" - collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, self.feature_extractor) + # TODO hack by collating feature_extractor and image_processor + feature_extractor = self.feature_extractor if self.feature_extractor is not None else self.image_processor + collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, feature_extractor) dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, collate_fn=collate_fn) model_iterator = PipelineIterator(dataloader, self.forward, forward_params, loader_batch_size=batch_size) final_iterator = PipelineIterator(model_iterator, self.postprocess, postprocess_params) @@ -1121,7 +1126,10 @@ class ChunkPipeline(Pipeline): ) num_workers = 1 dataset = PipelineChunkIterator(inputs, self.preprocess, preprocess_params) - collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, self.feature_extractor) + + # TODO hack by collating feature_extractor and image_processor + feature_extractor = self.feature_extractor if self.feature_extractor is not None else self.image_processor + collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, feature_extractor) dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, collate_fn=collate_fn) model_iterator = PipelinePackIterator(dataloader, self.forward, forward_params, loader_batch_size=batch_size) final_iterator = PipelineIterator(model_iterator, self.postprocess, postprocess_params) diff --git a/src/transformers/pipelines/image_segmentation.py b/src/transformers/pipelines/image_segmentation.py index 9fdb0dc331..5be5b858dc 100644 --- a/src/transformers/pipelines/image_segmentation.py +++ b/src/transformers/pipelines/image_segmentation.py @@ -67,6 +67,12 @@ class ImageSegmentationPipeline(Pipeline): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + if self.image_processor is None and self.feature_extractor is not None: + # Backward compatible change, if users called + # ImageSegmentationPipeline(.., feature_extractor=MyFeatureExtractor()) + # then we should keep working + self.image_processor = self.feature_extractor + if self.framework == "tf": raise ValueError(f"The {self.__class__} is only available in PyTorch.") @@ -137,7 +143,7 @@ class ImageSegmentationPipeline(Pipeline): def preprocess(self, image): image = load_image(image) target_size = [(image.height, image.width)] - inputs = self.feature_extractor(images=[image], return_tensors="pt") + inputs = self.image_processor(images=[image], return_tensors="pt") inputs["target_size"] = target_size return inputs @@ -152,10 +158,10 @@ class ImageSegmentationPipeline(Pipeline): ): fn = None - if subtask in {"panoptic", None} and hasattr(self.feature_extractor, "post_process_panoptic_segmentation"): - fn = self.feature_extractor.post_process_panoptic_segmentation - elif subtask in {"instance", None} and hasattr(self.feature_extractor, "post_process_instance_segmentation"): - fn = self.feature_extractor.post_process_instance_segmentation + if subtask in {"panoptic", None} and hasattr(self.image_processor, "post_process_panoptic_segmentation"): + fn = self.image_processor.post_process_panoptic_segmentation + elif subtask in {"instance", None} and hasattr(self.image_processor, "post_process_instance_segmentation"): + fn = self.image_processor.post_process_instance_segmentation if fn is not None: outputs = fn( @@ -176,8 +182,8 @@ class ImageSegmentationPipeline(Pipeline): score = segment["score"] annotation.append({"score": score, "label": label, "mask": mask}) - elif subtask in {"semantic", None} and hasattr(self.feature_extractor, "post_process_semantic_segmentation"): - outputs = self.feature_extractor.post_process_semantic_segmentation( + elif subtask in {"semantic", None} and hasattr(self.image_processor, "post_process_semantic_segmentation"): + outputs = self.image_processor.post_process_semantic_segmentation( model_outputs, target_sizes=model_outputs["target_size"] )[0] diff --git a/src/transformers/utils/generic.py b/src/transformers/utils/generic.py index b601d1e61b..d138e0c1d4 100644 --- a/src/transformers/utils/generic.py +++ b/src/transformers/utils/generic.py @@ -29,9 +29,6 @@ import numpy as np from .import_utils import is_flax_available, is_tf_available, is_torch_available, is_torch_fx_proxy -if is_tf_available(): - import tensorflow as tf - if is_flax_available(): import jax.numpy as jnp @@ -437,6 +434,8 @@ def transpose(array, axes=None): elif is_torch_tensor(array): return array.T if axes is None else array.permute(*axes) elif is_tf_tensor(array): + import tensorflow as tf + return tf.transpose(array, perm=axes) elif is_jax_tensor(array): return jnp.transpose(array, axes=axes) @@ -454,6 +453,8 @@ def reshape(array, newshape): elif is_torch_tensor(array): return array.reshape(*newshape) elif is_tf_tensor(array): + import tensorflow as tf + return tf.reshape(array, newshape) elif is_jax_tensor(array): return jnp.reshape(array, newshape) @@ -471,6 +472,8 @@ def squeeze(array, axis=None): elif is_torch_tensor(array): return array.squeeze() if axis is None else array.squeeze(dim=axis) elif is_tf_tensor(array): + import tensorflow as tf + return tf.squeeze(array, axis=axis) elif is_jax_tensor(array): return jnp.squeeze(array, axis=axis) @@ -488,6 +491,8 @@ def expand_dims(array, axis): elif is_torch_tensor(array): return array.unsqueeze(dim=axis) elif is_tf_tensor(array): + import tensorflow as tf + return tf.expand_dims(array, axis=axis) elif is_jax_tensor(array): return jnp.expand_dims(array, axis=axis) @@ -504,6 +509,8 @@ def tensor_size(array): elif is_torch_tensor(array): return array.numel() elif is_tf_tensor(array): + import tensorflow as tf + return tf.size(array) elif is_jax_tensor(array): return array.size diff --git a/tests/pipelines/test_pipelines_audio_classification.py b/tests/pipelines/test_pipelines_audio_classification.py index 3f957132fd..2eccf8e6c3 100644 --- a/tests/pipelines/test_pipelines_audio_classification.py +++ b/tests/pipelines/test_pipelines_audio_classification.py @@ -27,7 +27,7 @@ from .test_pipelines_common import ANY, PipelineTestCaseMeta class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING - def get_test_pipeline(self, model, tokenizer, feature_extractor): + def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): audio_classifier = AudioClassificationPipeline(model=model, feature_extractor=feature_extractor) # test with a raw waveform diff --git a/tests/pipelines/test_pipelines_automatic_speech_recognition.py b/tests/pipelines/test_pipelines_automatic_speech_recognition.py index 3a5dcc7f43..a1204eb9f9 100644 --- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py +++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py @@ -61,7 +61,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase, metaclass=Pipel + (MODEL_FOR_CTC_MAPPING.items() if MODEL_FOR_CTC_MAPPING else []) } - def get_test_pipeline(self, model, tokenizer, feature_extractor): + def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): if tokenizer is None: # Side effect of no Fast Tokenizer class for these model, so skipping # But the slow tokenizer test should still run as they're quite small diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 8da9ed89a6..aad1645830 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -33,8 +33,10 @@ from huggingface_hub import HfFolder, Repository, create_repo, delete_repo, set_ from requests.exceptions import HTTPError from transformers import ( FEATURE_EXTRACTOR_MAPPING, + IMAGE_PROCESSOR_MAPPING, TOKENIZER_MAPPING, AutoFeatureExtractor, + AutoImageProcessor, AutoModelForSequenceClassification, AutoTokenizer, DistilBertForSequenceClassification, @@ -154,8 +156,6 @@ def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config, feature_ feature_extractor = None except Exception: feature_extractor = None - if hasattr(tiny_config, "image_size") and feature_extractor: - feature_extractor = feature_extractor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size) # Audio Spectogram Transformer specific. if feature_extractor.__class__.__name__ == "ASTFeatureExtractor": @@ -168,9 +168,28 @@ def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config, feature_ feature_extractor = feature_extractor.__class__( feature_size=tiny_config.input_feat_per_channel, num_mel_bins=tiny_config.input_feat_per_channel ) + # TODO remove this, once those have been moved to `image_processor`. + if hasattr(tiny_config, "image_size") and feature_extractor: + feature_extractor = feature_extractor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size) return feature_extractor +def get_tiny_image_processor_from_checkpoint(checkpoint, tiny_config, image_processor_class): + try: + image_processor = AutoImageProcessor.from_pretrained(checkpoint) + except Exception: + try: + if image_processor_class is not None: + image_processor = image_processor_class() + else: + image_processor = None + except Exception: + image_processor = None + if hasattr(tiny_config, "image_size") and image_processor: + image_processor = image_processor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size) + return image_processor + + class ANY: def __init__(self, *_types): self._types = _types @@ -184,7 +203,9 @@ class ANY: class PipelineTestCaseMeta(type): def __new__(mcs, name, bases, dct): - def gen_test(ModelClass, checkpoint, tiny_config, tokenizer_class, feature_extractor_class): + def gen_test( + ModelClass, checkpoint, tiny_config, tokenizer_class, feature_extractor_class, image_processor_class + ): @skipIf( tiny_config is None, "TinyConfig does not exist, make sure that you defined a `_CONFIG_FOR_DOC` variable in the modeling" @@ -231,16 +252,21 @@ class PipelineTestCaseMeta(type): self.skipTest(f"Ignoring {ModelClass}, cannot create a simple tokenizer") else: tokenizer = None + feature_extractor = get_tiny_feature_extractor_from_checkpoint( checkpoint, tiny_config, feature_extractor_class ) - if tokenizer is None and feature_extractor is None: + image_processor = get_tiny_image_processor_from_checkpoint( + checkpoint, tiny_config, image_processor_class + ) + + if tokenizer is None and feature_extractor is None and image_processor: self.skipTest( - f"Ignoring {ModelClass}, cannot create a tokenizer or feature_extractor (PerceiverConfig with" - " no FastTokenizer ?)" + f"Ignoring {ModelClass}, cannot create a tokenizer or feature_extractor or image_processor" + " (PerceiverConfig with no FastTokenizer ?)" ) - pipeline, examples = self.get_test_pipeline(model, tokenizer, feature_extractor) + pipeline, examples = self.get_test_pipeline(model, tokenizer, feature_extractor, image_processor) if pipeline is None: # The test can disable itself, but it should be very marginal # Concerns: Wav2Vec2ForCTC without tokenizer test (FastTokenizer don't exist) @@ -283,6 +309,10 @@ class PipelineTestCaseMeta(type): feature_extractor_name = ( feature_extractor_class.__name__ if feature_extractor_class else "nofeature_extractor" ) + image_processor_class = IMAGE_PROCESSOR_MAPPING.get(configuration, None) + image_processor_name = ( + image_processor_class.__name__ if image_processor_class else "noimage_processor" + ) if not tokenizer_classes: # We need to test even if there are no tokenizers. tokenizer_classes = [None] @@ -300,7 +330,7 @@ class PipelineTestCaseMeta(type): else: tokenizer_name = "notokenizer" - test_name = f"test_{prefix}_{configuration.__name__}_{model_architecture.__name__}_{tokenizer_name}_{feature_extractor_name}" + test_name = f"test_{prefix}_{configuration.__name__}_{model_architecture.__name__}_{tokenizer_name}_{feature_extractor_name}_{image_processor_name}" if tokenizer_class is not None or feature_extractor_class is not None: dct[test_name] = gen_test( @@ -309,6 +339,7 @@ class PipelineTestCaseMeta(type): tiny_config, tokenizer_class, feature_extractor_class, + image_processor_class, ) @abstractmethod diff --git a/tests/pipelines/test_pipelines_conversational.py b/tests/pipelines/test_pipelines_conversational.py index 39ad2175dc..a3a2c3b694 100644 --- a/tests/pipelines/test_pipelines_conversational.py +++ b/tests/pipelines/test_pipelines_conversational.py @@ -53,7 +53,7 @@ class ConversationalPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseM else [] ) - def get_test_pipeline(self, model, tokenizer, feature_extractor): + def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): conversation_agent = ConversationalPipeline(model=model, tokenizer=tokenizer) return conversation_agent, [Conversation("Hi there!")] diff --git a/tests/pipelines/test_pipelines_depth_estimation.py b/tests/pipelines/test_pipelines_depth_estimation.py index d42ba2a067..593cd940a9 100644 --- a/tests/pipelines/test_pipelines_depth_estimation.py +++ b/tests/pipelines/test_pipelines_depth_estimation.py @@ -47,7 +47,7 @@ class DepthEstimationPipelineTests(unittest.TestCase, metaclass=PipelineTestCase model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING - def get_test_pipeline(self, model, tokenizer, feature_extractor): + def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): depth_estimator = DepthEstimationPipeline(model=model, feature_extractor=feature_extractor) return depth_estimator, [ "./tests/fixtures/tests_samples/COCO/000000039769.png", diff --git a/tests/pipelines/test_pipelines_document_question_answering.py b/tests/pipelines/test_pipelines_document_question_answering.py index c73decda0a..8ace9cfe2b 100644 --- a/tests/pipelines/test_pipelines_document_question_answering.py +++ b/tests/pipelines/test_pipelines_document_question_answering.py @@ -59,7 +59,7 @@ class DocumentQuestionAnsweringPipelineTests(unittest.TestCase, metaclass=Pipeli @require_pytesseract @require_vision - def get_test_pipeline(self, model, tokenizer, feature_extractor): + def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): dqa_pipeline = pipeline( "document-question-answering", model=model, tokenizer=tokenizer, feature_extractor=feature_extractor ) diff --git a/tests/pipelines/test_pipelines_feature_extraction.py b/tests/pipelines/test_pipelines_feature_extraction.py index 28cde51a8e..4aa58873e8 100644 --- a/tests/pipelines/test_pipelines_feature_extraction.py +++ b/tests/pipelines/test_pipelines_feature_extraction.py @@ -175,7 +175,7 @@ class FeatureExtractionPipelineTests(unittest.TestCase, metaclass=PipelineTestCa raise ValueError("We expect lists of floats, nothing else") return shape - def get_test_pipeline(self, model, tokenizer, feature_extractor): + def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): if tokenizer is None: self.skipTest("No tokenizer") return diff --git a/tests/pipelines/test_pipelines_fill_mask.py b/tests/pipelines/test_pipelines_fill_mask.py index 760c475524..a19902a61d 100644 --- a/tests/pipelines/test_pipelines_fill_mask.py +++ b/tests/pipelines/test_pipelines_fill_mask.py @@ -206,7 +206,7 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): unmasker.tokenizer.pad_token = None self.run_pipeline_test(unmasker, []) - def get_test_pipeline(self, model, tokenizer, feature_extractor): + def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): if tokenizer is None or tokenizer.mask_token_id is None: self.skipTest("The provided tokenizer has no mask token, (probably reformer or wav2vec2)") diff --git a/tests/pipelines/test_pipelines_image_classification.py b/tests/pipelines/test_pipelines_image_classification.py index 8c83888ba0..90612d21b7 100644 --- a/tests/pipelines/test_pipelines_image_classification.py +++ b/tests/pipelines/test_pipelines_image_classification.py @@ -49,7 +49,7 @@ class ImageClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING tf_model_mapping = TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING - def get_test_pipeline(self, model, tokenizer, feature_extractor): + def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): image_classifier = ImageClassificationPipeline(model=model, feature_extractor=feature_extractor, top_k=2) examples = [ Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"), diff --git a/tests/pipelines/test_pipelines_image_segmentation.py b/tests/pipelines/test_pipelines_image_segmentation.py index 889a4ba041..8f022e68da 100644 --- a/tests/pipelines/test_pipelines_image_segmentation.py +++ b/tests/pipelines/test_pipelines_image_segmentation.py @@ -26,6 +26,7 @@ from transformers import ( MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING, MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING, AutoFeatureExtractor, + AutoImageProcessor, AutoModelForImageSegmentation, AutoModelForInstanceSegmentation, DetrForSegmentation, @@ -80,8 +81,10 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa + (MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING.items() if MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING else []) } - def get_test_pipeline(self, model, tokenizer, feature_extractor): - image_segmenter = ImageSegmentationPipeline(model=model, feature_extractor=feature_extractor) + def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): + image_segmenter = ImageSegmentationPipeline( + model=model, feature_extractor=feature_extractor, image_processor=image_processor + ) return image_segmenter, [ "./tests/fixtures/tests_samples/COCO/000000039769.png", "./tests/fixtures/tests_samples/COCO/000000039769.png", @@ -139,7 +142,11 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa "./tests/fixtures/tests_samples/COCO/000000039769.png", ] outputs = image_segmenter( - batch, threshold=0.0, mask_threshold=0, overlap_mask_area_threshold=0, batch_size=batch_size + batch, + threshold=0.0, + mask_threshold=0, + overlap_mask_area_threshold=0, + batch_size=batch_size, ) self.assertEqual(len(batch), len(outputs)) self.assertEqual(len(outputs[0]), n) @@ -188,10 +195,10 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa model_id = "hf-internal-testing/tiny-detr-mobilenetsv3-panoptic" model = AutoModelForImageSegmentation.from_pretrained(model_id) - feature_extractor = AutoFeatureExtractor.from_pretrained(model_id) + image_processor = AutoImageProcessor.from_pretrained(model_id) image_segmenter = ImageSegmentationPipeline( model=model, - feature_extractor=feature_extractor, + image_processor=image_processor, subtask="panoptic", threshold=0.0, mask_threshold=0.0, diff --git a/tests/pipelines/test_pipelines_image_to_text.py b/tests/pipelines/test_pipelines_image_to_text.py index 0e1e805f9b..c6fdaa3102 100644 --- a/tests/pipelines/test_pipelines_image_to_text.py +++ b/tests/pipelines/test_pipelines_image_to_text.py @@ -36,7 +36,7 @@ class ImageToTextPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta model_mapping = MODEL_FOR_VISION_2_SEQ_MAPPING tf_model_mapping = TF_MODEL_FOR_VISION_2_SEQ_MAPPING - def get_test_pipeline(self, model, tokenizer, feature_extractor): + def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): pipe = pipeline("image-to-text", model=model, tokenizer=tokenizer, feature_extractor=feature_extractor) examples = [ Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"), diff --git a/tests/pipelines/test_pipelines_object_detection.py b/tests/pipelines/test_pipelines_object_detection.py index 043e73cb6e..caf9001ece 100644 --- a/tests/pipelines/test_pipelines_object_detection.py +++ b/tests/pipelines/test_pipelines_object_detection.py @@ -51,7 +51,7 @@ else: class ObjectDetectionPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): model_mapping = MODEL_FOR_OBJECT_DETECTION_MAPPING - def get_test_pipeline(self, model, tokenizer, feature_extractor): + def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): object_detector = ObjectDetectionPipeline(model=model, feature_extractor=feature_extractor) return object_detector, ["./tests/fixtures/tests_samples/COCO/000000039769.png"] diff --git a/tests/pipelines/test_pipelines_question_answering.py b/tests/pipelines/test_pipelines_question_answering.py index 496b1685d9..84447ac230 100644 --- a/tests/pipelines/test_pipelines_question_answering.py +++ b/tests/pipelines/test_pipelines_question_answering.py @@ -31,7 +31,7 @@ class QAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): model_mapping = MODEL_FOR_QUESTION_ANSWERING_MAPPING tf_model_mapping = TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING - def get_test_pipeline(self, model, tokenizer, feature_extractor): + def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): if isinstance(model.config, LxmertConfig): # This is an bimodal model, we need to find a more consistent way # to switch on those models. diff --git a/tests/pipelines/test_pipelines_summarization.py b/tests/pipelines/test_pipelines_summarization.py index 781716b5ba..aa8cd86fb8 100644 --- a/tests/pipelines/test_pipelines_summarization.py +++ b/tests/pipelines/test_pipelines_summarization.py @@ -34,7 +34,7 @@ class SummarizationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMe model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING - def get_test_pipeline(self, model, tokenizer, feature_extractor): + def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): summarizer = SummarizationPipeline(model=model, tokenizer=tokenizer) return summarizer, ["(CNN)The Palestinian Authority officially became", "Some other text"] diff --git a/tests/pipelines/test_pipelines_text2text_generation.py b/tests/pipelines/test_pipelines_text2text_generation.py index 772190fb63..4fe9e6d150 100644 --- a/tests/pipelines/test_pipelines_text2text_generation.py +++ b/tests/pipelines/test_pipelines_text2text_generation.py @@ -34,7 +34,7 @@ class Text2TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTest model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING - def get_test_pipeline(self, model, tokenizer, feature_extractor): + def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): generator = Text2TextGenerationPipeline(model=model, tokenizer=tokenizer) return generator, ["Something to write", "Something else"] diff --git a/tests/pipelines/test_pipelines_text_classification.py b/tests/pipelines/test_pipelines_text_classification.py index 80e8e2559f..849751b917 100644 --- a/tests/pipelines/test_pipelines_text_classification.py +++ b/tests/pipelines/test_pipelines_text_classification.py @@ -129,7 +129,7 @@ class TextClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestC outputs = text_classifier("Birds are a type of animal") self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 0.988}]) - def get_test_pipeline(self, model, tokenizer, feature_extractor): + def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer) return text_classifier, ["HuggingFace is in", "This is another test"] diff --git a/tests/pipelines/test_pipelines_text_generation.py b/tests/pipelines/test_pipelines_text_generation.py index 922a4e24b2..5dc3e5e34f 100644 --- a/tests/pipelines/test_pipelines_text_generation.py +++ b/tests/pipelines/test_pipelines_text_generation.py @@ -143,7 +143,7 @@ class TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseM ], ) - def get_test_pipeline(self, model, tokenizer, feature_extractor): + def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): text_generator = TextGenerationPipeline(model=model, tokenizer=tokenizer) return text_generator, ["This is a test", "Another test"] diff --git a/tests/pipelines/test_pipelines_token_classification.py b/tests/pipelines/test_pipelines_token_classification.py index 2e44448e13..1999be6494 100644 --- a/tests/pipelines/test_pipelines_token_classification.py +++ b/tests/pipelines/test_pipelines_token_classification.py @@ -37,7 +37,7 @@ class TokenClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING tf_model_mapping = TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING - def get_test_pipeline(self, model, tokenizer, feature_extractor): + def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer) return token_classifier, ["A simple string", "A simple string that is quite a bit longer"] diff --git a/tests/pipelines/test_pipelines_translation.py b/tests/pipelines/test_pipelines_translation.py index d8de606f69..3fc19a9064 100644 --- a/tests/pipelines/test_pipelines_translation.py +++ b/tests/pipelines/test_pipelines_translation.py @@ -34,7 +34,7 @@ class TranslationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING - def get_test_pipeline(self, model, tokenizer, feature_extractor): + def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): if isinstance(model.config, MBartConfig): src_lang, tgt_lang = list(tokenizer.lang_code_to_id.keys())[:2] translator = TranslationPipeline(model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang) diff --git a/tests/pipelines/test_pipelines_video_classification.py b/tests/pipelines/test_pipelines_video_classification.py index 25ddcfaf2d..2e5a777fd5 100644 --- a/tests/pipelines/test_pipelines_video_classification.py +++ b/tests/pipelines/test_pipelines_video_classification.py @@ -35,7 +35,7 @@ from .test_pipelines_common import ANY, PipelineTestCaseMeta class VideoClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): model_mapping = MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING - def get_test_pipeline(self, model, tokenizer, feature_extractor): + def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): example_video_filepath = hf_hub_download( repo_id="nateraw/video-demo", filename="archery.mp4", repo_type="dataset" ) diff --git a/tests/pipelines/test_pipelines_visual_question_answering.py b/tests/pipelines/test_pipelines_visual_question_answering.py index bf3a532b10..796b39a267 100644 --- a/tests/pipelines/test_pipelines_visual_question_answering.py +++ b/tests/pipelines/test_pipelines_visual_question_answering.py @@ -36,7 +36,7 @@ else: class VisualQuestionAnsweringPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta): model_mapping = MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING - def get_test_pipeline(self, model, tokenizer, feature_extractor): + def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): vqa_pipeline = pipeline("visual-question-answering", model="hf-internal-testing/tiny-vilt-random-vqa") examples = [ { diff --git a/tests/pipelines/test_pipelines_zero_shot.py b/tests/pipelines/test_pipelines_zero_shot.py index 5c78db1aa9..57b77d791b 100644 --- a/tests/pipelines/test_pipelines_zero_shot.py +++ b/tests/pipelines/test_pipelines_zero_shot.py @@ -30,7 +30,7 @@ class ZeroShotClassificationPipelineTests(unittest.TestCase, metaclass=PipelineT model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING tf_model_mapping = TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING - def get_test_pipeline(self, model, tokenizer, feature_extractor): + def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): classifier = ZeroShotClassificationPipeline( model=model, tokenizer=tokenizer, candidate_labels=["polics", "health"] ) diff --git a/tests/pipelines/test_pipelines_zero_shot_image_classification.py b/tests/pipelines/test_pipelines_zero_shot_image_classification.py index d0396f4e9a..41451f9386 100644 --- a/tests/pipelines/test_pipelines_zero_shot_image_classification.py +++ b/tests/pipelines/test_pipelines_zero_shot_image_classification.py @@ -37,7 +37,7 @@ class ZeroShotImageClassificationPipelineTests(unittest.TestCase, metaclass=Pipe # and only CLIP would be there for now. # model_mapping = {CLIPConfig: CLIPModel} - # def get_test_pipeline(self, model, tokenizer, feature_extractor): + # def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): # if tokenizer is None: # # Side effect of no Fast Tokenizer class for these model, so skipping # # But the slow tokenizer test should still run as they're quite small diff --git a/tests/pipelines/test_pipelines_zero_shot_object_detection.py b/tests/pipelines/test_pipelines_zero_shot_object_detection.py index c48b8c381d..304a9ca020 100644 --- a/tests/pipelines/test_pipelines_zero_shot_object_detection.py +++ b/tests/pipelines/test_pipelines_zero_shot_object_detection.py @@ -36,7 +36,7 @@ class ZeroShotObjectDetectionPipelineTests(unittest.TestCase, metaclass=Pipeline model_mapping = MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING - def get_test_pipeline(self, model, tokenizer, feature_extractor): + def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor): object_detector = pipeline( "zero-shot-object-detection", model="hf-internal-testing/tiny-random-owlvit-object-detection" )