From b8db265bc6d0c9208ee465a12c6497149b4ee725 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Thu, 23 Nov 2023 21:00:39 +0100 Subject: [PATCH] Update tiny model summary file (#27388) * update * fix --------- Co-authored-by: ydshieh --- .../models/auto/image_processing_auto.py | 1 + tests/models/clvp/test_modeling_clvp.py | 4 +- tests/models/fuyu/test_modeling_fuyu.py | 5 +- tests/models/kosmos2/test_modeling_kosmos2.py | 14 +- .../test_modeling_seamless_m4t.py | 18 +- tests/models/swin2sr/test_modeling_swin2sr.py | 6 +- tests/models/whisper/test_modeling_whisper.py | 1 + .../test_pipelines_text_generation.py | 7 +- tests/utils/tiny_model_summary.json | 231 +++++++++++++++++- 9 files changed, 277 insertions(+), 10 deletions(-) diff --git a/src/transformers/models/auto/image_processing_auto.py b/src/transformers/models/auto/image_processing_auto.py index 168b7a5dff..7d26d668ab 100644 --- a/src/transformers/models/auto/image_processing_auto.py +++ b/src/transformers/models/auto/image_processing_auto.py @@ -71,6 +71,7 @@ IMAGE_PROCESSOR_MAPPING_NAMES = OrderedDict( ("idefics", "IdeficsImageProcessor"), ("imagegpt", "ImageGPTImageProcessor"), ("instructblip", "BlipImageProcessor"), + ("kosmos-2", "CLIPImageProcessor"), ("layoutlmv2", "LayoutLMv2ImageProcessor"), ("layoutlmv3", "LayoutLMv3ImageProcessor"), ("levit", "LevitImageProcessor"), diff --git a/tests/models/clvp/test_modeling_clvp.py b/tests/models/clvp/test_modeling_clvp.py index 1b3ab79034..3ebe5fe357 100644 --- a/tests/models/clvp/test_modeling_clvp.py +++ b/tests/models/clvp/test_modeling_clvp.py @@ -38,6 +38,7 @@ from ...test_modeling_common import ( ids_tensor, random_attention_mask, ) +from ...test_pipeline_mixin import PipelineTesterMixin if is_torch_available(): @@ -281,9 +282,10 @@ class ClvpDecoderTester: @require_torch -class ClvpDecoderTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase): +class ClvpDecoderTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = (ClvpModel, ClvpForCausalLM) if is_torch_available() else () all_generative_model_classes = (ClvpForCausalLM,) if is_torch_available() else () + pipeline_model_mapping = {"feature-extraction": ClvpModelForConditionalGeneration} if is_torch_available() else {} test_pruning = False diff --git a/tests/models/fuyu/test_modeling_fuyu.py b/tests/models/fuyu/test_modeling_fuyu.py index d475e1e0ca..84c9128892 100644 --- a/tests/models/fuyu/test_modeling_fuyu.py +++ b/tests/models/fuyu/test_modeling_fuyu.py @@ -24,6 +24,7 @@ from transformers.testing_utils import require_torch, require_torch_gpu, slow, t from transformers.utils import cached_property from ...test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask +from ...test_pipeline_mixin import PipelineTesterMixin if is_vision_available(): @@ -262,9 +263,9 @@ class FuyuModelTester: @require_torch -class FuyuModelTest(ModelTesterMixin, unittest.TestCase): +class FuyuModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = (FuyuForCausalLM,) if is_torch_available() else () - pipeline_model_mapping = {"image-to-text": FuyuForCausalLM} if is_torch_available() else {} + pipeline_model_mapping = {"text-generation": FuyuForCausalLM} if is_torch_available() else {} test_head_masking = False test_pruning = False diff --git a/tests/models/kosmos2/test_modeling_kosmos2.py b/tests/models/kosmos2/test_modeling_kosmos2.py index 5491ded1bc..dd953eedc8 100644 --- a/tests/models/kosmos2/test_modeling_kosmos2.py +++ b/tests/models/kosmos2/test_modeling_kosmos2.py @@ -37,6 +37,7 @@ from ...test_modeling_common import ( ids_tensor, random_attention_mask, ) +from ...test_pipeline_mixin import PipelineTesterMixin if is_torch_available(): @@ -244,15 +245,26 @@ class Kosmos2ModelTester: @require_torch -class Kosmos2ModelTest(ModelTesterMixin, unittest.TestCase): +class Kosmos2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = (Kosmos2Model, Kosmos2ForConditionalGeneration) if is_torch_available() else () all_generative_model_classes = (Kosmos2ForConditionalGeneration,) if is_torch_available() else () + pipeline_model_mapping = ( + {"feature-extraction": Kosmos2Model, "image-to-text": Kosmos2ForConditionalGeneration} + if is_torch_available() + else {} + ) fx_compatible = False test_head_masking = False test_pruning = False test_resize_embeddings = False test_attention_outputs = False + # TODO: `image-to-text` pipeline for this model needs Processor. + def is_pipeline_test_to_skip( + self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name + ): + return pipeline_test_casse_name == "ImageToTextPipelineTests" + def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): inputs_dict = copy.deepcopy(inputs_dict) diff --git a/tests/models/seamless_m4t/test_modeling_seamless_m4t.py b/tests/models/seamless_m4t/test_modeling_seamless_m4t.py index ab7a48694d..4eb4c7359f 100644 --- a/tests/models/seamless_m4t/test_modeling_seamless_m4t.py +++ b/tests/models/seamless_m4t/test_modeling_seamless_m4t.py @@ -34,6 +34,7 @@ from ...test_modeling_common import ( ids_tensor, random_attention_mask, ) +from ...test_pipeline_mixin import PipelineTesterMixin if is_torch_available(): @@ -616,7 +617,9 @@ class SeamlessM4TModelWithSpeechInputTest(ModelTesterMixin, unittest.TestCase): @require_torch -class SeamlessM4TModelWithTextInputTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase): +class SeamlessM4TModelWithTextInputTest( + ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase +): is_encoder_decoder = True fx_compatible = False test_missing_keys = False @@ -636,6 +639,19 @@ class SeamlessM4TModelWithTextInputTest(ModelTesterMixin, GenerationTesterMixin, else () ) all_generative_model_classes = (SeamlessM4TForTextToText,) if is_torch_available() else () + pipeline_model_mapping = ( + { + "automatic-speech-recognition": SeamlessM4TForSpeechToText, + "conversational": SeamlessM4TForTextToText, + "feature-extraction": SeamlessM4TModel, + "summarization": SeamlessM4TForTextToText, + "text-to-audio": SeamlessM4TForTextToSpeech, + "text2text-generation": SeamlessM4TForTextToText, + "translation": SeamlessM4TForTextToText, + } + if is_torch_available() + else {} + ) def setUp(self): self.model_tester = SeamlessM4TModelTester(self, input_modality="text") diff --git a/tests/models/swin2sr/test_modeling_swin2sr.py b/tests/models/swin2sr/test_modeling_swin2sr.py index f94e11ad64..7306896036 100644 --- a/tests/models/swin2sr/test_modeling_swin2sr.py +++ b/tests/models/swin2sr/test_modeling_swin2sr.py @@ -162,7 +162,11 @@ class Swin2SRModelTester: @require_torch class Swin2SRModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = (Swin2SRModel, Swin2SRForImageSuperResolution) if is_torch_available() else () - pipeline_model_mapping = {"feature-extraction": Swin2SRModel} if is_torch_available() else {} + pipeline_model_mapping = ( + {"feature-extraction": Swin2SRModel, "image-to-image": Swin2SRForImageSuperResolution} + if is_torch_available() + else {} + ) fx_compatible = False test_pruning = False diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py index f77d81d76e..6f01cfdac2 100644 --- a/tests/models/whisper/test_modeling_whisper.py +++ b/tests/models/whisper/test_modeling_whisper.py @@ -367,6 +367,7 @@ class WhisperModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi "audio-classification": WhisperForAudioClassification, "automatic-speech-recognition": WhisperForConditionalGeneration, "feature-extraction": WhisperModel, + "text-generation": WhisperForCausalLM, } if is_torch_available() else {} diff --git a/tests/pipelines/test_pipelines_text_generation.py b/tests/pipelines/test_pipelines_text_generation.py index b9a5febb56..dc77204f3e 100644 --- a/tests/pipelines/test_pipelines_text_generation.py +++ b/tests/pipelines/test_pipelines_text_generation.py @@ -242,7 +242,12 @@ class TextGenerationPipelineTests(unittest.TestCase): # We don't care about infinite range models. # They already work. # Skip this test for XGLM, since it uses sinusoidal positional embeddings which are resized on-the-fly. - EXTRA_MODELS_CAN_HANDLE_LONG_INPUTS = ["RwkvForCausalLM", "XGLMForCausalLM", "GPTNeoXForCausalLM"] + EXTRA_MODELS_CAN_HANDLE_LONG_INPUTS = [ + "RwkvForCausalLM", + "XGLMForCausalLM", + "GPTNeoXForCausalLM", + "FuyuForCausalLM", + ] if ( tokenizer.model_max_length < 10000 and text_generator.model.__class__.__name__ not in EXTRA_MODELS_CAN_HANDLE_LONG_INPUTS diff --git a/tests/utils/tiny_model_summary.json b/tests/utils/tiny_model_summary.json index 2a1efa7d88..5f2c6c0b4e 100644 --- a/tests/utils/tiny_model_summary.json +++ b/tests/utils/tiny_model_summary.json @@ -877,6 +877,16 @@ ], "sha": "a7874595b900f9b2ddc79130dafc3ff48f4fbfb9" }, + "ClvpModelForConditionalGeneration": { + "tokenizer_classes": [ + "ClvpTokenizer" + ], + "processor_classes": [ + "ClvpFeatureExtractor" + ], + "model_classes": [], + "sha": "45df7581535be337ff781707b6c20994ca221f05" + }, "CodeGenForCausalLM": { "tokenizer_classes": [ "CodeGenTokenizer", @@ -1039,7 +1049,8 @@ "ConvNextImageProcessor" ], "model_classes": [ - "ConvNextV2ForImageClassification" + "ConvNextV2ForImageClassification", + "TFConvNextV2ForImageClassification" ], "sha": "ee22bae1cbb87d66fc7f62f7e15a43d6ff80d3cc" }, @@ -1049,7 +1060,8 @@ "ConvNextImageProcessor" ], "model_classes": [ - "ConvNextV2Model" + "ConvNextV2Model", + "TFConvNextV2Model" ], "sha": "c4dd68ee1102cba05bcc483da2a88e39427b7249" }, @@ -2136,6 +2148,56 @@ ], "sha": "683f6f73a2ab87801f1695a72d1af63cf173ab7c" }, + "FalconForCausalLM": { + "tokenizer_classes": [ + "PreTrainedTokenizerFast" + ], + "processor_classes": [], + "model_classes": [ + "FalconForCausalLM" + ], + "sha": "60076d5dafc5e33ba9c90dcd05e7c0834e44049a" + }, + "FalconForQuestionAnswering": { + "tokenizer_classes": [ + "PreTrainedTokenizerFast" + ], + "processor_classes": [], + "model_classes": [ + "FalconForQuestionAnswering" + ], + "sha": "b1ee9cd5fad2d177ea5a46df4611cd02f66ae788" + }, + "FalconForSequenceClassification": { + "tokenizer_classes": [ + "PreTrainedTokenizerFast" + ], + "processor_classes": [], + "model_classes": [ + "FalconForSequenceClassification" + ], + "sha": "007838c0991c2b6a87dc49a8a5c20f29149a00fa" + }, + "FalconForTokenClassification": { + "tokenizer_classes": [ + "PreTrainedTokenizerFast" + ], + "processor_classes": [], + "model_classes": [ + "FalconForTokenClassification" + ], + "sha": "0ea6ae548773daa6e3317fddc058957e956eebf4" + }, + "FalconModel": { + "tokenizer_classes": [ + "PreTrainedTokenizerFast" + ], + "processor_classes": [], + "model_classes": [ + "FalconModel" + ], + "sha": "ca15a579c946eb00c5b39cc8e0ea63d0c1460f84" + }, "FlaubertForMultipleChoice": { "tokenizer_classes": [ "FlaubertTokenizer" @@ -2364,6 +2426,18 @@ ], "sha": "bfbaa8fa21c3abf80b94e7168b5ecff8ec5b5f76" }, + "FuyuForCausalLM": { + "tokenizer_classes": [ + "LlamaTokenizerFast" + ], + "processor_classes": [ + "FuyuImageProcessor" + ], + "model_classes": [ + "FuyuForCausalLM" + ], + "sha": "685d78258ea95c5c82e0e4555d0d4a2270ab8bff" + }, "GLPNForDepthEstimation": { "tokenizer_classes": [], "processor_classes": [ @@ -2866,6 +2940,30 @@ ], "sha": "5a7983e48d5841704733dd0756177680ed50c074" }, + "Kosmos2ForConditionalGeneration": { + "tokenizer_classes": [ + "XLMRobertaTokenizerFast" + ], + "processor_classes": [ + "CLIPImageProcessor" + ], + "model_classes": [ + "Kosmos2ForConditionalGeneration" + ], + "sha": "d1d4607782b911411676f1ee79997dee645def58" + }, + "Kosmos2Model": { + "tokenizer_classes": [ + "XLMRobertaTokenizerFast" + ], + "processor_classes": [ + "CLIPImageProcessor" + ], + "model_classes": [ + "Kosmos2Model" + ], + "sha": "379d8944a65312094d9ab1c4b8a82058a2d3274e" + }, "LEDForConditionalGeneration": { "tokenizer_classes": [ "LEDTokenizer", @@ -3820,6 +3918,39 @@ ], "sha": "f197d5bfa1fe27b5f28a6e6d4e3ad229b753450a" }, + "MistralForCausalLM": { + "tokenizer_classes": [ + "LlamaTokenizer", + "LlamaTokenizerFast" + ], + "processor_classes": [], + "model_classes": [ + "MistralForCausalLM" + ], + "sha": "f7e06aeedbba8f4f665b438b868ed932d451f64b" + }, + "MistralForSequenceClassification": { + "tokenizer_classes": [ + "LlamaTokenizer", + "LlamaTokenizerFast" + ], + "processor_classes": [], + "model_classes": [ + "MistralForSequenceClassification" + ], + "sha": "65045444ea1933309270d8b08b21d3fa94a84290" + }, + "MistralModel": { + "tokenizer_classes": [ + "LlamaTokenizer", + "LlamaTokenizerFast" + ], + "processor_classes": [], + "model_classes": [ + "MistralModel" + ], + "sha": "becd727ad72b1e8a7c0fa0ea39b61904fa68aeac" + }, "MobileBertForMaskedLM": { "tokenizer_classes": [ "MobileBertTokenizer", @@ -4558,6 +4689,32 @@ ], "sha": "f0e27b2b4e53ba70e05d13dcfea8e85272b292a5" }, + "Owlv2ForObjectDetection": { + "tokenizer_classes": [ + "CLIPTokenizer", + "CLIPTokenizerFast" + ], + "processor_classes": [ + "Owlv2ImageProcessor" + ], + "model_classes": [ + "Owlv2ForObjectDetection" + ], + "sha": "30439c0b2749726468dc13a755261e8101170052" + }, + "Owlv2Model": { + "tokenizer_classes": [ + "CLIPTokenizer", + "CLIPTokenizerFast" + ], + "processor_classes": [ + "Owlv2ImageProcessor" + ], + "model_classes": [ + "Owlv2Model" + ], + "sha": "7aeebdad5f72b36cb07c74355afad8e6052e2377" + }, "PLBartForCausalLM": { "tokenizer_classes": [ "PLBartTokenizer" @@ -4760,6 +4917,50 @@ ], "sha": "b8c8d479e29e9ee048e2d0b05b001ac835ad8859" }, + "PhiForCausalLM": { + "tokenizer_classes": [ + "CodeGenTokenizer", + "CodeGenTokenizerFast" + ], + "processor_classes": [], + "model_classes": [ + "PhiForCausalLM" + ], + "sha": "3fecc0109a4a3a230e3a5509eaf47a26eba85d79" + }, + "PhiForSequenceClassification": { + "tokenizer_classes": [ + "CodeGenTokenizer", + "CodeGenTokenizerFast" + ], + "processor_classes": [], + "model_classes": [ + "PhiForSequenceClassification" + ], + "sha": "e1c9f8ebf1317516acc1cd6338de71a53e770245" + }, + "PhiForTokenClassification": { + "tokenizer_classes": [ + "CodeGenTokenizer", + "CodeGenTokenizerFast" + ], + "processor_classes": [], + "model_classes": [ + "PhiForTokenClassification" + ], + "sha": "d3a8054903753b5c96c05eaf9877905a116a1d5e" + }, + "PhiModel": { + "tokenizer_classes": [ + "CodeGenTokenizer", + "CodeGenTokenizerFast" + ], + "processor_classes": [], + "model_classes": [ + "PhiModel" + ], + "sha": "99c38d5ce7ace35127d00ed3eeb3561308ea6b21" + }, "Pix2StructForConditionalGeneration": { "tokenizer_classes": [ "T5TokenizerFast" @@ -4768,7 +4969,9 @@ "Pix2StructImageProcessor", "Pix2StructProcessor" ], - "model_classes": [], + "model_classes": [ + "Pix2StructForConditionalGeneration" + ], "sha": "42b3de00ad535076c4893e4ac5ae2d2748cc4ccb" }, "PoolFormerForImageClassification": { @@ -5691,6 +5894,16 @@ ], "sha": "25ba2d88c770533f8c69811d2a454a00c1d09f5d" }, + "Swin2SRForImageSuperResolution": { + "tokenizer_classes": [], + "processor_classes": [ + "Swin2SRImageProcessor" + ], + "model_classes": [ + "Swin2SRForImageSuperResolution" + ], + "sha": "3a2780de0b455084c018ac8a62b56040969e26ec" + }, "Swin2SRModel": { "tokenizer_classes": [], "processor_classes": [ @@ -6625,6 +6838,18 @@ ], "sha": "d71b13674b1a67443cd19d0594a3b5b1e5968f0d" }, + "WhisperForCausalLM": { + "tokenizer_classes": [ + "WhisperTokenizer" + ], + "processor_classes": [ + "WhisperFeatureExtractor" + ], + "model_classes": [ + "WhisperForCausalLM" + ], + "sha": "e7febfd7f4512e029293c677e6d2633e23fc459a" + }, "WhisperForConditionalGeneration": { "tokenizer_classes": [ "WhisperTokenizer",