From e6de91867638cc17b627418c9203de99ab1c4abc Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Thu, 2 Mar 2023 18:20:34 +0100 Subject: [PATCH] Add Blip and Blip2 for pipeline tests (#21904) * fix * add to tests * style and quality * add missing --------- Co-authored-by: NielsRogge Co-authored-by: ydshieh --- src/transformers/models/auto/modeling_auto.py | 2 ++ tests/models/blip/test_modeling_blip.py | 6 +++- tests/models/blip_2/test_modeling_blip_2.py | 12 +++++-- tests/utils/tiny_model_summary.json | 36 +++++++++++++++++++ utils/create_dummy_models.py | 17 +++++++-- 5 files changed, 66 insertions(+), 7 deletions(-) diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py index 2e2a53d6f1..be30298650 100755 --- a/src/transformers/models/auto/modeling_auto.py +++ b/src/transformers/models/auto/modeling_auto.py @@ -496,6 +496,8 @@ MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES = OrderedDict( MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES = OrderedDict( [ + ("blip", "BlipForConditionalGeneration"), + ("blip-2", "Blip2ForConditionalGeneration"), ("vision-encoder-decoder", "VisionEncoderDecoderModel"), ] ) diff --git a/tests/models/blip/test_modeling_blip.py b/tests/models/blip/test_modeling_blip.py index cc5915a73b..52085d04af 100644 --- a/tests/models/blip/test_modeling_blip.py +++ b/tests/models/blip/test_modeling_blip.py @@ -394,7 +394,11 @@ class BlipModelTester: @require_torch class BlipModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = (BlipModel,) if is_torch_available() else () - pipeline_model_mapping = {"feature-extraction": BlipModel} if is_torch_available() else {} + pipeline_model_mapping = ( + {"feature-extraction": BlipModel, "image-to-text": BlipForConditionalGeneration} + if is_torch_available() + else {} + ) fx_compatible = False test_head_masking = False test_pruning = False diff --git a/tests/models/blip_2/test_modeling_blip_2.py b/tests/models/blip_2/test_modeling_blip_2.py index 71bba12c28..302feb87a1 100644 --- a/tests/models/blip_2/test_modeling_blip_2.py +++ b/tests/models/blip_2/test_modeling_blip_2.py @@ -34,6 +34,7 @@ from ...test_modeling_common import ( ids_tensor, random_attention_mask, ) +from ...test_pipeline_mixin import PipelineTesterMixin if is_torch_available(): @@ -584,7 +585,7 @@ class Blip2TextModelTester: # this model tester uses an encoder-decoder language model (T5) -class Blip2ForConditionalGenerationModelTester: +class Blip2ModelTester: def __init__( self, parent, vision_kwargs=None, qformer_kwargs=None, text_kwargs=None, is_training=True, num_query_tokens=10 ): @@ -664,8 +665,13 @@ class Blip2ForConditionalGenerationModelTester: @require_torch -class Blip2ModelTest(ModelTesterMixin, unittest.TestCase): +class Blip2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = (Blip2ForConditionalGeneration, Blip2Model) if is_torch_available() else () + pipeline_model_mapping = ( + {"feature-extraction": Blip2Model, "image-to-text": Blip2ForConditionalGeneration} + if is_torch_available() + else {} + ) fx_compatible = False test_head_masking = False test_pruning = False @@ -674,7 +680,7 @@ class Blip2ModelTest(ModelTesterMixin, unittest.TestCase): test_torchscript = False def setUp(self): - self.model_tester = Blip2ForConditionalGenerationModelTester(self) + self.model_tester = Blip2ModelTester(self) def test_for_conditional_generation(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() diff --git a/tests/utils/tiny_model_summary.json b/tests/utils/tiny_model_summary.json index f379590abc..d2d2ff2146 100644 --- a/tests/utils/tiny_model_summary.json +++ b/tests/utils/tiny_model_summary.json @@ -55,6 +55,42 @@ ], "processor_classes": [] }, + "BlipModel": { + "tokenizer_classes": [ + "BertTokenizerFast", + "BertTokenizer" + ], + "processor_classes": [ + "BlipImageProcessor" + ] + }, + "BlipForConditionalGeneration": { + "tokenizer_classes": [ + "BertTokenizerFast", + "BertTokenizer" + ], + "processor_classes": [ + "BlipImageProcessor" + ] + }, + "Blip2Model": { + "tokenizer_classes": [ + "GPT2TokenizerFast", + "GPT2Tokenizer" + ], + "processor_classes": [ + "BlipImageProcessor" + ] + }, + "Blip2ForConditionalGeneration": { + "tokenizer_classes": [ + "GPT2TokenizerFast", + "GPT2Tokenizer" + ], + "processor_classes": [ + "BlipImageProcessor" + ] + }, "BloomModel": { "tokenizer_classes": [ "BloomTokenizerFast" diff --git a/utils/create_dummy_models.py b/utils/create_dummy_models.py index 162a310c65..9d8c93a762 100644 --- a/utils/create_dummy_models.py +++ b/utils/create_dummy_models.py @@ -410,7 +410,10 @@ def convert_processors(processors, tiny_config, output_folder, result): elif isinstance(processor, ProcessorMixin): # Currently, we only have these 2 possibilities tokenizers.append(processor.tokenizer) - feature_extractors.append(processor.feature_extractor) + if hasattr(processor, "image_processor"): + feature_extractors.append(processor.image_processor) + elif hasattr(processor, "feature_extractor"): + feature_extractors.append(processor.feature_extractor) # check the built processors have the unique type num_types = len({x.__class__.__name__ for x in feature_extractors}) @@ -557,7 +560,7 @@ def upload_model(model_dir, organization): repo_exist = False error = None try: - create_repo(repo_id=repo_name, organization=organization, exist_ok=False, repo_type="model") + create_repo(repo_id=f"{organization}/{repo_name}", exist_ok=False, repo_type="model") except Exception as e: error = e if "You already created" in str(e): @@ -778,7 +781,15 @@ def get_config_overrides(config_class, processors): model_tester_kwargs = {"vocab_size": vocab_size} # CLIP-like models have `text_model_tester` and `vision_model_tester`, and we need to pass `vocab_size` to # `text_model_tester` via `text_kwargs`. The same trick is also necessary for `Flava`. - if config_class.__name__ in ["CLIPConfig", "GroupViTConfig", "OwlViTConfig", "XCLIPConfig", "FlavaConfig"]: + if config_class.__name__ in [ + "CLIPConfig", + "GroupViTConfig", + "OwlViTConfig", + "XCLIPConfig", + "FlavaConfig", + "BlipConfig", + "Blip2Config", + ]: del model_tester_kwargs["vocab_size"] model_tester_kwargs["text_kwargs"] = {"vocab_size": vocab_size} # `FSMTModelTester` accepts `src_vocab_size` and `tgt_vocab_size` but not `vocab_size`.