From e6de91867638cc17b627418c9203de99ab1c4abc Mon Sep 17 00:00:00 2001
From: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
Date: Thu, 2 Mar 2023 18:20:34 +0100
Subject: [PATCH] Add Blip and Blip2 for pipeline tests (#21904)

* fix

* add to tests

* style and quality

* add missing

---------

Co-authored-by: NielsRogge <NielsRogge@users.noreply.github.com>
Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
---
 src/transformers/models/auto/modeling_auto.py |  2 ++
 tests/models/blip/test_modeling_blip.py       |  6 +++-
 tests/models/blip_2/test_modeling_blip_2.py   | 12 +++++--
 tests/utils/tiny_model_summary.json           | 36 +++++++++++++++++++
 utils/create_dummy_models.py                  | 17 +++++++--
 5 files changed, 66 insertions(+), 7 deletions(-)

diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py
index 2e2a53d6f1..be30298650 100755
--- a/src/transformers/models/auto/modeling_auto.py
+++ b/src/transformers/models/auto/modeling_auto.py
@@ -496,6 +496,8 @@ MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES = OrderedDict(
 
 MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES = OrderedDict(
     [
+        ("blip", "BlipForConditionalGeneration"),
+        ("blip-2", "Blip2ForConditionalGeneration"),
         ("vision-encoder-decoder", "VisionEncoderDecoderModel"),
     ]
 )
diff --git a/tests/models/blip/test_modeling_blip.py b/tests/models/blip/test_modeling_blip.py
index cc5915a73b..52085d04af 100644
--- a/tests/models/blip/test_modeling_blip.py
+++ b/tests/models/blip/test_modeling_blip.py
@@ -394,7 +394,11 @@ class BlipModelTester:
 @require_torch
 class BlipModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
     all_model_classes = (BlipModel,) if is_torch_available() else ()
-    pipeline_model_mapping = {"feature-extraction": BlipModel} if is_torch_available() else {}
+    pipeline_model_mapping = (
+        {"feature-extraction": BlipModel, "image-to-text": BlipForConditionalGeneration}
+        if is_torch_available()
+        else {}
+    )
     fx_compatible = False
     test_head_masking = False
     test_pruning = False
diff --git a/tests/models/blip_2/test_modeling_blip_2.py b/tests/models/blip_2/test_modeling_blip_2.py
index 71bba12c28..302feb87a1 100644
--- a/tests/models/blip_2/test_modeling_blip_2.py
+++ b/tests/models/blip_2/test_modeling_blip_2.py
@@ -34,6 +34,7 @@ from ...test_modeling_common import (
     ids_tensor,
     random_attention_mask,
 )
+from ...test_pipeline_mixin import PipelineTesterMixin
 
 
 if is_torch_available():
@@ -584,7 +585,7 @@ class Blip2TextModelTester:
 
 
 # this model tester uses an encoder-decoder language model (T5)
-class Blip2ForConditionalGenerationModelTester:
+class Blip2ModelTester:
     def __init__(
         self, parent, vision_kwargs=None, qformer_kwargs=None, text_kwargs=None, is_training=True, num_query_tokens=10
     ):
@@ -664,8 +665,13 @@ class Blip2ForConditionalGenerationModelTester:
 
 
 @require_torch
-class Blip2ModelTest(ModelTesterMixin, unittest.TestCase):
+class Blip2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
     all_model_classes = (Blip2ForConditionalGeneration, Blip2Model) if is_torch_available() else ()
+    pipeline_model_mapping = (
+        {"feature-extraction": Blip2Model, "image-to-text": Blip2ForConditionalGeneration}
+        if is_torch_available()
+        else {}
+    )
     fx_compatible = False
     test_head_masking = False
     test_pruning = False
@@ -674,7 +680,7 @@ class Blip2ModelTest(ModelTesterMixin, unittest.TestCase):
     test_torchscript = False
 
     def setUp(self):
-        self.model_tester = Blip2ForConditionalGenerationModelTester(self)
+        self.model_tester = Blip2ModelTester(self)
 
     def test_for_conditional_generation(self):
         config_and_inputs = self.model_tester.prepare_config_and_inputs()
diff --git a/tests/utils/tiny_model_summary.json b/tests/utils/tiny_model_summary.json
index f379590abc..d2d2ff2146 100644
--- a/tests/utils/tiny_model_summary.json
+++ b/tests/utils/tiny_model_summary.json
@@ -55,6 +55,42 @@
         ],
         "processor_classes": []
     },
+    "BlipModel": {
+        "tokenizer_classes": [
+            "BertTokenizerFast",
+            "BertTokenizer"
+        ],
+        "processor_classes": [
+            "BlipImageProcessor"
+        ]
+    },
+    "BlipForConditionalGeneration": {
+        "tokenizer_classes": [
+            "BertTokenizerFast",
+            "BertTokenizer"
+        ],
+        "processor_classes": [
+            "BlipImageProcessor"
+        ]
+    },
+    "Blip2Model": {
+        "tokenizer_classes": [
+            "GPT2TokenizerFast",
+            "GPT2Tokenizer"
+        ],
+        "processor_classes": [
+            "BlipImageProcessor"
+        ]
+    },
+    "Blip2ForConditionalGeneration": {
+        "tokenizer_classes": [
+            "GPT2TokenizerFast",
+            "GPT2Tokenizer"
+        ],
+        "processor_classes": [
+            "BlipImageProcessor"
+        ]
+    },
     "BloomModel": {
         "tokenizer_classes": [
             "BloomTokenizerFast"
diff --git a/utils/create_dummy_models.py b/utils/create_dummy_models.py
index 162a310c65..9d8c93a762 100644
--- a/utils/create_dummy_models.py
+++ b/utils/create_dummy_models.py
@@ -410,7 +410,10 @@ def convert_processors(processors, tiny_config, output_folder, result):
         elif isinstance(processor, ProcessorMixin):
             # Currently, we only have these 2 possibilities
             tokenizers.append(processor.tokenizer)
-            feature_extractors.append(processor.feature_extractor)
+            if hasattr(processor, "image_processor"):
+                feature_extractors.append(processor.image_processor)
+            elif hasattr(processor, "feature_extractor"):
+                feature_extractors.append(processor.feature_extractor)
 
     # check the built processors have the unique type
     num_types = len({x.__class__.__name__ for x in feature_extractors})
@@ -557,7 +560,7 @@ def upload_model(model_dir, organization):
     repo_exist = False
     error = None
     try:
-        create_repo(repo_id=repo_name, organization=organization, exist_ok=False, repo_type="model")
+        create_repo(repo_id=f"{organization}/{repo_name}", exist_ok=False, repo_type="model")
     except Exception as e:
         error = e
         if "You already created" in str(e):
@@ -778,7 +781,15 @@ def get_config_overrides(config_class, processors):
     model_tester_kwargs = {"vocab_size": vocab_size}
     # CLIP-like models have `text_model_tester` and `vision_model_tester`, and we need to pass `vocab_size` to
     # `text_model_tester` via `text_kwargs`. The same trick is also necessary for `Flava`.
-    if config_class.__name__ in ["CLIPConfig", "GroupViTConfig", "OwlViTConfig", "XCLIPConfig", "FlavaConfig"]:
+    if config_class.__name__ in [
+        "CLIPConfig",
+        "GroupViTConfig",
+        "OwlViTConfig",
+        "XCLIPConfig",
+        "FlavaConfig",
+        "BlipConfig",
+        "Blip2Config",
+    ]:
         del model_tester_kwargs["vocab_size"]
         model_tester_kwargs["text_kwargs"] = {"vocab_size": vocab_size}
     # `FSMTModelTester` accepts `src_vocab_size` and `tgt_vocab_size` but not `vocab_size`.