Supporting ImageProcessor in place of FeatureExtractor for pipelines (#20851)

* Fixing the pipeline with image processor. * Update the slow test. * Using only the first image processor. * Include exclusion mecanism for Image processor. * Do not handle Gitconfig, deemed as a bug. * Apply suggestions from code review Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com> * Remove `conversational` changes. They are not supposed to be here. * Address first row of comments. * Remove OneFormer modifications. Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
2023-01-25 10:16:31 +01:00
parent efdbad56ab
commit 99e7905422
28 changed files with 138 additions and 47 deletions
--- a/tests/pipelines/test_pipelines_common.py
+++ b/tests/pipelines/test_pipelines_common.py
@@ -33,8 +33,10 @@ from huggingface_hub import HfFolder, Repository, create_repo, delete_repo, set_
 from requests.exceptions import HTTPError
 from transformers import (
    FEATURE_EXTRACTOR_MAPPING,
+    IMAGE_PROCESSOR_MAPPING,
    TOKENIZER_MAPPING,
    AutoFeatureExtractor,
+    AutoImageProcessor,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DistilBertForSequenceClassification,
@@ -154,8 +156,6 @@ def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config, feature_
                feature_extractor = None
        except Exception:
            feature_extractor = None
-    if hasattr(tiny_config, "image_size") and feature_extractor:
-        feature_extractor = feature_extractor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)

    # Audio Spectogram Transformer specific.
    if feature_extractor.__class__.__name__ == "ASTFeatureExtractor":
@@ -168,9 +168,28 @@ def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config, feature_
        feature_extractor = feature_extractor.__class__(
            feature_size=tiny_config.input_feat_per_channel, num_mel_bins=tiny_config.input_feat_per_channel
        )
+    # TODO remove this, once those have been moved to `image_processor`.
+    if hasattr(tiny_config, "image_size") and feature_extractor:
+        feature_extractor = feature_extractor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)
    return feature_extractor


+def get_tiny_image_processor_from_checkpoint(checkpoint, tiny_config, image_processor_class):
+    try:
+        image_processor = AutoImageProcessor.from_pretrained(checkpoint)
+    except Exception:
+        try:
+            if image_processor_class is not None:
+                image_processor = image_processor_class()
+            else:
+                image_processor = None
+        except Exception:
+            image_processor = None
+    if hasattr(tiny_config, "image_size") and image_processor:
+        image_processor = image_processor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)
+    return image_processor
+
+
 class ANY:
    def __init__(self, *_types):
        self._types = _types
@@ -184,7 +203,9 @@ class ANY:

 class PipelineTestCaseMeta(type):
    def __new__(mcs, name, bases, dct):
-        def gen_test(ModelClass, checkpoint, tiny_config, tokenizer_class, feature_extractor_class):
+        def gen_test(
+            ModelClass, checkpoint, tiny_config, tokenizer_class, feature_extractor_class, image_processor_class
+        ):
            @skipIf(
                tiny_config is None,
                "TinyConfig does not exist, make sure that you defined a `_CONFIG_FOR_DOC` variable in the modeling"
@@ -231,16 +252,21 @@ class PipelineTestCaseMeta(type):
                        self.skipTest(f"Ignoring {ModelClass}, cannot create a simple tokenizer")
                else:
                    tokenizer = None
+
                feature_extractor = get_tiny_feature_extractor_from_checkpoint(
                    checkpoint, tiny_config, feature_extractor_class
                )

-                if tokenizer is None and feature_extractor is None:
+                image_processor = get_tiny_image_processor_from_checkpoint(
+                    checkpoint, tiny_config, image_processor_class
+                )
+
+                if tokenizer is None and feature_extractor is None and image_processor:
                    self.skipTest(
-                        f"Ignoring {ModelClass}, cannot create a tokenizer or feature_extractor (PerceiverConfig with"
-                        " no FastTokenizer ?)"
+                        f"Ignoring {ModelClass}, cannot create a tokenizer or feature_extractor or image_processor"
+                        " (PerceiverConfig with no FastTokenizer ?)"
                    )
-                pipeline, examples = self.get_test_pipeline(model, tokenizer, feature_extractor)
+                pipeline, examples = self.get_test_pipeline(model, tokenizer, feature_extractor, image_processor)
                if pipeline is None:
                    # The test can disable itself, but it should be very marginal
                    # Concerns: Wav2Vec2ForCTC without tokenizer test (FastTokenizer don't exist)
@@ -283,6 +309,10 @@ class PipelineTestCaseMeta(type):
                        feature_extractor_name = (
                            feature_extractor_class.__name__ if feature_extractor_class else "nofeature_extractor"
                        )
+                        image_processor_class = IMAGE_PROCESSOR_MAPPING.get(configuration, None)
+                        image_processor_name = (
+                            image_processor_class.__name__ if image_processor_class else "noimage_processor"
+                        )
                        if not tokenizer_classes:
                            # We need to test even if there are no tokenizers.
                            tokenizer_classes = [None]
@@ -300,7 +330,7 @@ class PipelineTestCaseMeta(type):
                            else:
                                tokenizer_name = "notokenizer"

-                            test_name = f"test_{prefix}_{configuration.__name__}_{model_architecture.__name__}_{tokenizer_name}_{feature_extractor_name}"
+                            test_name = f"test_{prefix}_{configuration.__name__}_{model_architecture.__name__}_{tokenizer_name}_{feature_extractor_name}_{image_processor_name}"

                            if tokenizer_class is not None or feature_extractor_class is not None:
                                dct[test_name] = gen_test(
@@ -309,6 +339,7 @@ class PipelineTestCaseMeta(type):
                                    tiny_config,
                                    tokenizer_class,
                                    feature_extractor_class,
+                                    image_processor_class,
                                )

        @abstractmethod