Supporting ImageProcessor in place of FeatureExtractor for pipelines (#20851)

* Fixing the pipeline with image processor. * Update the slow test. * Using only the first image processor. * Include exclusion mecanism for Image processor. * Do not handle Gitconfig, deemed as a bug. * Apply suggestions from code review Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Remove `conversational` changes. They are not supposed to be here. * Address first row of comments. * Remove OneFormer modifications. Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
2023-01-25 10:16:31 +01:00
parent efdbad56ab
commit 99e7905422
28 changed files with 138 additions and 47 deletions
--- a/tests/pipelines/test_pipelines_audio_classification.py
+++ b/tests/pipelines/test_pipelines_audio_classification.py
@@ -27,7 +27,7 @@ from .test_pipelines_common import ANY, PipelineTestCaseMeta
 class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
    model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING

-    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
        audio_classifier = AudioClassificationPipeline(model=model, feature_extractor=feature_extractor)

        # test with a raw waveform
--- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py
+++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py
@@ -61,7 +61,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase, metaclass=Pipel
        + (MODEL_FOR_CTC_MAPPING.items() if MODEL_FOR_CTC_MAPPING else [])
    }

-    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
        if tokenizer is None:
            # Side effect of no Fast Tokenizer class for these model, so skipping
            # But the slow tokenizer test should still run as they're quite small
--- a/tests/pipelines/test_pipelines_common.py
+++ b/tests/pipelines/test_pipelines_common.py
@@ -33,8 +33,10 @@ from huggingface_hub import HfFolder, Repository, create_repo, delete_repo, set_
 from requests.exceptions import HTTPError
 from transformers import (
    FEATURE_EXTRACTOR_MAPPING,
+    IMAGE_PROCESSOR_MAPPING,
    TOKENIZER_MAPPING,
    AutoFeatureExtractor,
+    AutoImageProcessor,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DistilBertForSequenceClassification,
@@ -154,8 +156,6 @@ def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config, feature_
                feature_extractor = None
        except Exception:
            feature_extractor = None
-    if hasattr(tiny_config, "image_size") and feature_extractor:
-        feature_extractor = feature_extractor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)

    # Audio Spectogram Transformer specific.
    if feature_extractor.__class__.__name__ == "ASTFeatureExtractor":
@@ -168,9 +168,28 @@ def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config, feature_
        feature_extractor = feature_extractor.__class__(
            feature_size=tiny_config.input_feat_per_channel, num_mel_bins=tiny_config.input_feat_per_channel
        )
+    # TODO remove this, once those have been moved to `image_processor`.
+    if hasattr(tiny_config, "image_size") and feature_extractor:
+        feature_extractor = feature_extractor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)
    return feature_extractor


+def get_tiny_image_processor_from_checkpoint(checkpoint, tiny_config, image_processor_class):
+    try:
+        image_processor = AutoImageProcessor.from_pretrained(checkpoint)
+    except Exception:
+        try:
+            if image_processor_class is not None:
+                image_processor = image_processor_class()
+            else:
+                image_processor = None
+        except Exception:
+            image_processor = None
+    if hasattr(tiny_config, "image_size") and image_processor:
+        image_processor = image_processor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)
+    return image_processor
+
+
 class ANY:
    def __init__(self, *_types):
        self._types = _types
@@ -184,7 +203,9 @@ class ANY:

 class PipelineTestCaseMeta(type):
    def __new__(mcs, name, bases, dct):
-        def gen_test(ModelClass, checkpoint, tiny_config, tokenizer_class, feature_extractor_class):
+        def gen_test(
+            ModelClass, checkpoint, tiny_config, tokenizer_class, feature_extractor_class, image_processor_class
+        ):
            @skipIf(
                tiny_config is None,
                "TinyConfig does not exist, make sure that you defined a `_CONFIG_FOR_DOC` variable in the modeling"
@@ -231,16 +252,21 @@ class PipelineTestCaseMeta(type):
                        self.skipTest(f"Ignoring {ModelClass}, cannot create a simple tokenizer")
                else:
                    tokenizer = None
+
                feature_extractor = get_tiny_feature_extractor_from_checkpoint(
                    checkpoint, tiny_config, feature_extractor_class
                )

-                if tokenizer is None and feature_extractor is None:
+                image_processor = get_tiny_image_processor_from_checkpoint(
+                    checkpoint, tiny_config, image_processor_class
+                )
+
+                if tokenizer is None and feature_extractor is None and image_processor:
                    self.skipTest(
-                        f"Ignoring {ModelClass}, cannot create a tokenizer or feature_extractor (PerceiverConfig with"
-                        " no FastTokenizer ?)"
+                        f"Ignoring {ModelClass}, cannot create a tokenizer or feature_extractor or image_processor"
+                        " (PerceiverConfig with no FastTokenizer ?)"
                    )
-                pipeline, examples = self.get_test_pipeline(model, tokenizer, feature_extractor)
+                pipeline, examples = self.get_test_pipeline(model, tokenizer, feature_extractor, image_processor)
                if pipeline is None:
                    # The test can disable itself, but it should be very marginal
                    # Concerns: Wav2Vec2ForCTC without tokenizer test (FastTokenizer don't exist)
@@ -283,6 +309,10 @@ class PipelineTestCaseMeta(type):
                        feature_extractor_name = (
                            feature_extractor_class.__name__ if feature_extractor_class else "nofeature_extractor"
                        )
+                        image_processor_class = IMAGE_PROCESSOR_MAPPING.get(configuration, None)
+                        image_processor_name = (
+                            image_processor_class.__name__ if image_processor_class else "noimage_processor"
+                        )
                        if not tokenizer_classes:
                            # We need to test even if there are no tokenizers.
                            tokenizer_classes = [None]
@@ -300,7 +330,7 @@ class PipelineTestCaseMeta(type):
                            else:
                                tokenizer_name = "notokenizer"

-                            test_name = f"test_{prefix}_{configuration.__name__}_{model_architecture.__name__}_{tokenizer_name}_{feature_extractor_name}"
+                            test_name = f"test_{prefix}_{configuration.__name__}_{model_architecture.__name__}_{tokenizer_name}_{feature_extractor_name}_{image_processor_name}"

                            if tokenizer_class is not None or feature_extractor_class is not None:
                                dct[test_name] = gen_test(
@@ -309,6 +339,7 @@ class PipelineTestCaseMeta(type):
                                    tiny_config,
                                    tokenizer_class,
                                    feature_extractor_class,
+                                    image_processor_class,
                                )

        @abstractmethod
--- a/tests/pipelines/test_pipelines_conversational.py
+++ b/tests/pipelines/test_pipelines_conversational.py
@@ -53,7 +53,7 @@ class ConversationalPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseM
        else []
    )

-    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
        conversation_agent = ConversationalPipeline(model=model, tokenizer=tokenizer)
        return conversation_agent, [Conversation("Hi there!")]

--- a/tests/pipelines/test_pipelines_depth_estimation.py
+++ b/tests/pipelines/test_pipelines_depth_estimation.py
@@ -47,7 +47,7 @@ class DepthEstimationPipelineTests(unittest.TestCase, metaclass=PipelineTestCase

    model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING

-    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
        depth_estimator = DepthEstimationPipeline(model=model, feature_extractor=feature_extractor)
        return depth_estimator, [
            "./tests/fixtures/tests_samples/COCO/000000039769.png",
--- a/tests/pipelines/test_pipelines_document_question_answering.py
+++ b/tests/pipelines/test_pipelines_document_question_answering.py
@@ -59,7 +59,7 @@ class DocumentQuestionAnsweringPipelineTests(unittest.TestCase, metaclass=Pipeli

    @require_pytesseract
    @require_vision
-    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
        dqa_pipeline = pipeline(
            "document-question-answering", model=model, tokenizer=tokenizer, feature_extractor=feature_extractor
        )
--- a/tests/pipelines/test_pipelines_feature_extraction.py
+++ b/tests/pipelines/test_pipelines_feature_extraction.py
@@ -175,7 +175,7 @@ class FeatureExtractionPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
            raise ValueError("We expect lists of floats, nothing else")
        return shape

-    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
        if tokenizer is None:
            self.skipTest("No tokenizer")
            return
--- a/tests/pipelines/test_pipelines_fill_mask.py
+++ b/tests/pipelines/test_pipelines_fill_mask.py
@@ -206,7 +206,7 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
        unmasker.tokenizer.pad_token = None
        self.run_pipeline_test(unmasker, [])

-    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
        if tokenizer is None or tokenizer.mask_token_id is None:
            self.skipTest("The provided tokenizer has no mask token, (probably reformer or wav2vec2)")

--- a/tests/pipelines/test_pipelines_image_classification.py
+++ b/tests/pipelines/test_pipelines_image_classification.py
@@ -49,7 +49,7 @@ class ImageClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
    model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
    tf_model_mapping = TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING

-    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
        image_classifier = ImageClassificationPipeline(model=model, feature_extractor=feature_extractor, top_k=2)
        examples = [
            Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
--- a/tests/pipelines/test_pipelines_image_segmentation.py
+++ b/tests/pipelines/test_pipelines_image_segmentation.py
@@ -26,6 +26,7 @@ from transformers import (
    MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
    MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING,
    AutoFeatureExtractor,
+    AutoImageProcessor,
    AutoModelForImageSegmentation,
    AutoModelForInstanceSegmentation,
    DetrForSegmentation,
@@ -80,8 +81,10 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
        + (MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING.items() if MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING else [])
    }

-    def get_test_pipeline(self, model, tokenizer, feature_extractor):
-        image_segmenter = ImageSegmentationPipeline(model=model, feature_extractor=feature_extractor)
+    def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
+        image_segmenter = ImageSegmentationPipeline(
+            model=model, feature_extractor=feature_extractor, image_processor=image_processor
+        )
        return image_segmenter, [
            "./tests/fixtures/tests_samples/COCO/000000039769.png",
            "./tests/fixtures/tests_samples/COCO/000000039769.png",
@@ -139,7 +142,11 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
            "./tests/fixtures/tests_samples/COCO/000000039769.png",
        ]
        outputs = image_segmenter(
-            batch, threshold=0.0, mask_threshold=0, overlap_mask_area_threshold=0, batch_size=batch_size
+            batch,
+            threshold=0.0,
+            mask_threshold=0,
+            overlap_mask_area_threshold=0,
+            batch_size=batch_size,
        )
        self.assertEqual(len(batch), len(outputs))
        self.assertEqual(len(outputs[0]), n)
@@ -188,10 +195,10 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
        model_id = "hf-internal-testing/tiny-detr-mobilenetsv3-panoptic"

        model = AutoModelForImageSegmentation.from_pretrained(model_id)
-        feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
+        image_processor = AutoImageProcessor.from_pretrained(model_id)
        image_segmenter = ImageSegmentationPipeline(
            model=model,
-            feature_extractor=feature_extractor,
+            image_processor=image_processor,
            subtask="panoptic",
            threshold=0.0,
            mask_threshold=0.0,
--- a/tests/pipelines/test_pipelines_image_to_text.py
+++ b/tests/pipelines/test_pipelines_image_to_text.py
@@ -36,7 +36,7 @@ class ImageToTextPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta
    model_mapping = MODEL_FOR_VISION_2_SEQ_MAPPING
    tf_model_mapping = TF_MODEL_FOR_VISION_2_SEQ_MAPPING

-    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
        pipe = pipeline("image-to-text", model=model, tokenizer=tokenizer, feature_extractor=feature_extractor)
        examples = [
            Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
--- a/tests/pipelines/test_pipelines_object_detection.py
+++ b/tests/pipelines/test_pipelines_object_detection.py
@@ -51,7 +51,7 @@ else:
 class ObjectDetectionPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
    model_mapping = MODEL_FOR_OBJECT_DETECTION_MAPPING

-    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
        object_detector = ObjectDetectionPipeline(model=model, feature_extractor=feature_extractor)
        return object_detector, ["./tests/fixtures/tests_samples/COCO/000000039769.png"]

--- a/tests/pipelines/test_pipelines_question_answering.py
+++ b/tests/pipelines/test_pipelines_question_answering.py
@@ -31,7 +31,7 @@ class QAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
    model_mapping = MODEL_FOR_QUESTION_ANSWERING_MAPPING
    tf_model_mapping = TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING

-    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
        if isinstance(model.config, LxmertConfig):
            # This is an bimodal model, we need to find a more consistent way
            # to switch on those models.
--- a/tests/pipelines/test_pipelines_summarization.py
+++ b/tests/pipelines/test_pipelines_summarization.py
@@ -34,7 +34,7 @@ class SummarizationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMe
    model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
    tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING

-    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
        summarizer = SummarizationPipeline(model=model, tokenizer=tokenizer)
        return summarizer, ["(CNN)The Palestinian Authority officially became", "Some other text"]

--- a/tests/pipelines/test_pipelines_text2text_generation.py
+++ b/tests/pipelines/test_pipelines_text2text_generation.py
@@ -34,7 +34,7 @@ class Text2TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTest
    model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
    tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING

-    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
        generator = Text2TextGenerationPipeline(model=model, tokenizer=tokenizer)
        return generator, ["Something to write", "Something else"]

--- a/tests/pipelines/test_pipelines_text_classification.py
+++ b/tests/pipelines/test_pipelines_text_classification.py
@@ -129,7 +129,7 @@ class TextClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestC
        outputs = text_classifier("Birds are a type of animal")
        self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 0.988}])

-    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
        text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
        return text_classifier, ["HuggingFace is in", "This is another test"]

--- a/tests/pipelines/test_pipelines_text_generation.py
+++ b/tests/pipelines/test_pipelines_text_generation.py
@@ -143,7 +143,7 @@ class TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseM
            ],
        )

-    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
        text_generator = TextGenerationPipeline(model=model, tokenizer=tokenizer)
        return text_generator, ["This is a test", "Another test"]

--- a/tests/pipelines/test_pipelines_token_classification.py
+++ b/tests/pipelines/test_pipelines_token_classification.py
@@ -37,7 +37,7 @@ class TokenClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
    model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
    tf_model_mapping = TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING

-    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
        token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer)
        return token_classifier, ["A simple string", "A simple string that is quite a bit longer"]

--- a/tests/pipelines/test_pipelines_translation.py
+++ b/tests/pipelines/test_pipelines_translation.py
@@ -34,7 +34,7 @@ class TranslationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta
    model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
    tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING

-    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
        if isinstance(model.config, MBartConfig):
            src_lang, tgt_lang = list(tokenizer.lang_code_to_id.keys())[:2]
            translator = TranslationPipeline(model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang)
--- a/tests/pipelines/test_pipelines_video_classification.py
+++ b/tests/pipelines/test_pipelines_video_classification.py
@@ -35,7 +35,7 @@ from .test_pipelines_common import ANY, PipelineTestCaseMeta
 class VideoClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
    model_mapping = MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING

-    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
        example_video_filepath = hf_hub_download(
            repo_id="nateraw/video-demo", filename="archery.mp4", repo_type="dataset"
        )
--- a/tests/pipelines/test_pipelines_visual_question_answering.py
+++ b/tests/pipelines/test_pipelines_visual_question_answering.py
@@ -36,7 +36,7 @@ else:
 class VisualQuestionAnsweringPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
    model_mapping = MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING

-    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
        vqa_pipeline = pipeline("visual-question-answering", model="hf-internal-testing/tiny-vilt-random-vqa")
        examples = [
            {
--- a/tests/pipelines/test_pipelines_zero_shot.py
+++ b/tests/pipelines/test_pipelines_zero_shot.py
@@ -30,7 +30,7 @@ class ZeroShotClassificationPipelineTests(unittest.TestCase, metaclass=PipelineT
    model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
    tf_model_mapping = TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING

-    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
        classifier = ZeroShotClassificationPipeline(
            model=model, tokenizer=tokenizer, candidate_labels=["polics", "health"]
        )
--- a/tests/pipelines/test_pipelines_zero_shot_image_classification.py
+++ b/tests/pipelines/test_pipelines_zero_shot_image_classification.py
@@ -37,7 +37,7 @@ class ZeroShotImageClassificationPipelineTests(unittest.TestCase, metaclass=Pipe
    # and only CLIP would be there for now.
    # model_mapping = {CLIPConfig: CLIPModel}

-    # def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    # def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
    #     if tokenizer is None:
    #         # Side effect of no Fast Tokenizer class for these model, so skipping
    #         # But the slow tokenizer test should still run as they're quite small
--- a/tests/pipelines/test_pipelines_zero_shot_object_detection.py
+++ b/tests/pipelines/test_pipelines_zero_shot_object_detection.py
@@ -36,7 +36,7 @@ class ZeroShotObjectDetectionPipelineTests(unittest.TestCase, metaclass=Pipeline

    model_mapping = MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING

-    def get_test_pipeline(self, model, tokenizer, feature_extractor):
+    def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
        object_detector = pipeline(
            "zero-shot-object-detection", model="hf-internal-testing/tiny-random-owlvit-object-detection"
        )