From d9e4bc2895a818d7fb339254c07ce44b201d66d3 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Mon, 25 Sep 2023 18:08:12 +0200 Subject: [PATCH] Update tiny model information and pipeline tests (#26285) * Update tiny model summary file * add to pipeline tests * revert * fix import * fix import * fix * fix * update * update * update * fix * remove BarkModelTest * fix --------- Co-authored-by: ydshieh --- .../models/auto/modeling_tf_auto.py | 1 + tests/models/bark/test_modeling_bark.py | 23 --- tests/models/blip_2/test_modeling_blip_2.py | 6 +- tests/models/bros/test_modeling_bros.py | 15 +- tests/models/idefics/test_modeling_idefics.py | 2 +- .../pop2piano/test_modeling_pop2piano.py | 6 +- tests/models/vits/test_modeling_vits.py | 4 +- .../pipelines/test_pipelines_text_to_audio.py | 4 +- tests/utils/tiny_model_summary.json | 143 +++++++++++++++++- 9 files changed, 174 insertions(+), 30 deletions(-) diff --git a/src/transformers/models/auto/modeling_tf_auto.py b/src/transformers/models/auto/modeling_tf_auto.py index d2068a4511..b334dd3091 100644 --- a/src/transformers/models/auto/modeling_tf_auto.py +++ b/src/transformers/models/auto/modeling_tf_auto.py @@ -362,6 +362,7 @@ TF_MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES = OrderedDict([("wav2vec2", "TFW TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES = OrderedDict( [ ("layoutlm", "TFLayoutLMForQuestionAnswering"), + ("layoutlmv3", "TFLayoutLMv3ForQuestionAnswering"), ] ) diff --git a/tests/models/bark/test_modeling_bark.py b/tests/models/bark/test_modeling_bark.py index 6fc4cb58a6..3a5de30147 100644 --- a/tests/models/bark/test_modeling_bark.py +++ b/tests/models/bark/test_modeling_bark.py @@ -493,13 +493,6 @@ class BarkModelTester: self.is_training = is_training - def prepare_config_and_inputs(self): - # TODO: @Yoach: Preapre `inputs_dict` - inputs_dict = {} - config = self.get_config() - - return config, inputs_dict - def get_config(self): return BarkConfig.from_sub_model_configs( self.semantic_model_tester.get_config(), @@ -522,22 +515,6 @@ class BarkModelTester: return config - def prepare_config_and_inputs_for_common(self): - # TODO: @Yoach - pass - # return config, inputs_dict - - -# Need this class in oder to create tiny model for `bark` -# TODO (@Yoach) Implement actual test methods -@unittest.skip("So far all tests will fail.") -class BarkModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase): - all_model_classes = (BarkModel,) if is_torch_available() else () - - def setUp(self): - self.model_tester = BarkModelTester(self) - self.config_tester = ConfigTester(self, config_class=BarkConfig, n_embd=37) - @require_torch class BarkSemanticModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase): diff --git a/tests/models/blip_2/test_modeling_blip_2.py b/tests/models/blip_2/test_modeling_blip_2.py index c5bdb70791..66d59465a7 100644 --- a/tests/models/blip_2/test_modeling_blip_2.py +++ b/tests/models/blip_2/test_modeling_blip_2.py @@ -666,7 +666,11 @@ class Blip2ModelTester: class Blip2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = (Blip2ForConditionalGeneration, Blip2Model) if is_torch_available() else () pipeline_model_mapping = ( - {"feature-extraction": Blip2Model, "image-to-text": Blip2ForConditionalGeneration} + { + "feature-extraction": Blip2Model, + "image-to-text": Blip2ForConditionalGeneration, + "visual-question-answering": Blip2ForConditionalGeneration, + } if is_torch_available() else {} ) diff --git a/tests/models/bros/test_modeling_bros.py b/tests/models/bros/test_modeling_bros.py index 0820aeb074..c4fbaa2f98 100644 --- a/tests/models/bros/test_modeling_bros.py +++ b/tests/models/bros/test_modeling_bros.py @@ -22,6 +22,7 @@ from transformers.utils import is_torch_available from ...test_configuration_common import ConfigTester from ...test_modeling_common import ModelTesterMixin, ids_tensor, random_attention_mask +from ...test_pipeline_mixin import PipelineTesterMixin if is_torch_available(): @@ -272,7 +273,7 @@ class BrosModelTester: @require_torch -class BrosModelTest(ModelTesterMixin, unittest.TestCase): +class BrosModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): test_pruning = False test_torchscript = False test_mismatched_shapes = False @@ -288,6 +289,18 @@ class BrosModelTest(ModelTesterMixin, unittest.TestCase): else () ) all_generative_model_classes = () if is_torch_available() else () + pipeline_model_mapping = ( + {"feature-extraction": BrosModel, "token-classification": BrosForTokenClassification} + if is_torch_available() + else {} + ) + + # BROS requires `bbox` in the inputs which doesn't fit into the above 2 pipelines' input formats. + # see https://github.com/huggingface/transformers/pull/26294 + def is_pipeline_test_to_skip( + self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name + ): + return True def setUp(self): self.model_tester = BrosModelTester(self) diff --git a/tests/models/idefics/test_modeling_idefics.py b/tests/models/idefics/test_modeling_idefics.py index c6df84b11f..922e803726 100644 --- a/tests/models/idefics/test_modeling_idefics.py +++ b/tests/models/idefics/test_modeling_idefics.py @@ -260,7 +260,7 @@ class IdeficsModelTester: @require_torch class IdeficsModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = (IdeficsModel, IdeficsForVisionText2Text) if is_torch_available() else () - pipeline_model_mapping = {} + pipeline_model_mapping = {"feature-extraction": IdeficsModel} if is_torch_available() else {} test_pruning = False test_headmasking = False test_torchscript = False diff --git a/tests/models/pop2piano/test_modeling_pop2piano.py b/tests/models/pop2piano/test_modeling_pop2piano.py index 6347c85071..d19ddc10e1 100644 --- a/tests/models/pop2piano/test_modeling_pop2piano.py +++ b/tests/models/pop2piano/test_modeling_pop2piano.py @@ -37,6 +37,7 @@ from transformers.utils import is_essentia_available, is_librosa_available, is_s from ...generation.test_utils import GenerationTesterMixin from ...test_configuration_common import ConfigTester from ...test_modeling_common import ModelTesterMixin, ids_tensor +from ...test_pipeline_mixin import PipelineTesterMixin if is_torch_available(): @@ -509,9 +510,12 @@ class Pop2PianoModelTester: @require_torch -class Pop2PianoModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase): +class Pop2PianoModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = (Pop2PianoForConditionalGeneration,) if is_torch_available() else () all_generative_model_classes = () + pipeline_model_mapping = ( + {"automatic-speech-recognition": Pop2PianoForConditionalGeneration} if is_torch_available() else {} + ) all_parallelizable_model_classes = () fx_compatible = False test_pruning = False diff --git a/tests/models/vits/test_modeling_vits.py b/tests/models/vits/test_modeling_vits.py index 459a5587cf..e781a69c0b 100644 --- a/tests/models/vits/test_modeling_vits.py +++ b/tests/models/vits/test_modeling_vits.py @@ -156,7 +156,9 @@ class VitsModelTester: @require_torch class VitsModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = (VitsModel,) if is_torch_available() else () - pipeline_model_mapping = {"text-to-audio": VitsModel} if is_torch_available() else {} + pipeline_model_mapping = ( + {"feature-extraction": VitsModel, "text-to-audio": VitsModel} if is_torch_available() else {} + ) is_encoder_decoder = False test_pruning = False test_headmasking = False diff --git a/tests/pipelines/test_pipelines_text_to_audio.py b/tests/pipelines/test_pipelines_text_to_audio.py index 4a42122ce6..04acd8fdf8 100644 --- a/tests/pipelines/test_pipelines_text_to_audio.py +++ b/tests/pipelines/test_pipelines_text_to_audio.py @@ -181,7 +181,9 @@ class TextToAudioPipelineTests(unittest.TestCase): outputs = speech_generator("This is a test") self.assertEqual(ANY(np.ndarray), outputs["audio"]) - forward_params = {"num_return_sequences": 2, "do_sample": True} + forward_params = ( + {"num_return_sequences": 2, "do_sample": True} if speech_generator.model.can_generate() else {} + ) outputs = speech_generator(["This is great !", "Something else"], forward_params=forward_params) audio = [output["audio"] for output in outputs] self.assertEqual([ANY(np.ndarray), ANY(np.ndarray)], audio) diff --git a/tests/utils/tiny_model_summary.json b/tests/utils/tiny_model_summary.json index b7fdf87bac..e084780268 100644 --- a/tests/utils/tiny_model_summary.json +++ b/tests/utils/tiny_model_summary.json @@ -128,6 +128,17 @@ ], "sha": "3106af0fd503970717c05f27218e5cacf19ba872" }, + "BarkModel": { + "tokenizer_classes": [ + "BertTokenizer", + "BertTokenizerFast" + ], + "processor_classes": [], + "model_classes": [ + "BarkModel" + ], + "sha": "187e590fd87359cea47693e8cb11a604cd7b673c" + }, "BartForCausalLM": { "tokenizer_classes": [ "BartTokenizer", @@ -708,6 +719,28 @@ ], "sha": "28b600fcfdc4f4938406fb518abf895620048cb2" }, + "BrosForTokenClassification": { + "tokenizer_classes": [ + "BertTokenizer", + "BertTokenizerFast" + ], + "processor_classes": [], + "model_classes": [ + "BrosForTokenClassification" + ], + "sha": "4ec2c91936f96b93667e8946fc7abbdeeb08a6d7" + }, + "BrosModel": { + "tokenizer_classes": [ + "BertTokenizer", + "BertTokenizerFast" + ], + "processor_classes": [], + "model_classes": [ + "BrosModel" + ], + "sha": "e2464830b1874eeaf9f4b425fbe0ce8e7c7643e9" + }, "CLIPModel": { "tokenizer_classes": [ "CLIPTokenizer", @@ -1323,7 +1356,8 @@ ], "processor_classes": [], "model_classes": [ - "DebertaV2ForMultipleChoice" + "DebertaV2ForMultipleChoice", + "TFDebertaV2ForMultipleChoice" ], "sha": "07e39f520ce239b39ef8cb24cd7874d06c791063" }, @@ -1519,6 +1553,16 @@ ], "sha": "d6c75bc51196f0a683afb12de6310fdda13efefd" }, + "Dinov2Backbone": { + "tokenizer_classes": [], + "processor_classes": [ + "BitImageProcessor" + ], + "model_classes": [ + "Dinov2Backbone" + ], + "sha": "dbf8d2ff3092ac53c11e6525e6cbae7ace84769a" + }, "Dinov2ForImageClassification": { "tokenizer_classes": [], "processor_classes": [ @@ -2768,6 +2812,30 @@ ], "sha": "6749164c678d4883d455f98b1dfc98c62da8f08b" }, + "IdeficsForVisionText2Text": { + "tokenizer_classes": [ + "LlamaTokenizerFast" + ], + "processor_classes": [ + "IdeficsImageProcessor" + ], + "model_classes": [ + "IdeficsForVisionText2Text" + ], + "sha": "2c2f2e2cd6b02a77d0cdd8c3767ba9a6267dbd20" + }, + "IdeficsModel": { + "tokenizer_classes": [ + "LlamaTokenizerFast" + ], + "processor_classes": [ + "IdeficsImageProcessor" + ], + "model_classes": [ + "IdeficsModel" + ], + "sha": "649df2e35e067efd573ff2d083784a5cf876545e" + }, "ImageGPTForCausalImageModeling": { "tokenizer_classes": [], "processor_classes": [ @@ -4077,6 +4145,24 @@ ], "sha": "315f34f30bcc4b0b66b11987726df2a80c50e271" }, + "MusicgenForCausalLM": { + "tokenizer_classes": [ + "T5TokenizerFast" + ], + "processor_classes": [], + "model_classes": [], + "sha": "37e9ae5dafb601daa8364e9ac17da31cd82b274b" + }, + "MusicgenForConditionalGeneration": { + "tokenizer_classes": [ + "T5TokenizerFast" + ], + "processor_classes": [], + "model_classes": [ + "MusicgenForConditionalGeneration" + ], + "sha": "b71611b88832e53370e676da53b65042f7fc78ee" + }, "MvpForCausalLM": { "tokenizer_classes": [ "MvpTokenizer", @@ -4641,6 +4727,39 @@ ], "sha": "83ec4d2d61ed62525ee033e13d144817beb29d19" }, + "PersimmonForCausalLM": { + "tokenizer_classes": [ + "LlamaTokenizer", + "LlamaTokenizerFast" + ], + "processor_classes": [], + "model_classes": [ + "PersimmonForCausalLM" + ], + "sha": "454234d6496c3857f5bf3eafb784616e2cd3ea82" + }, + "PersimmonForSequenceClassification": { + "tokenizer_classes": [ + "LlamaTokenizer", + "LlamaTokenizerFast" + ], + "processor_classes": [], + "model_classes": [ + "PersimmonForSequenceClassification" + ], + "sha": "1d2674846543a181ca67bafa8b8f3a48bd2eefd1" + }, + "PersimmonModel": { + "tokenizer_classes": [ + "LlamaTokenizer", + "LlamaTokenizerFast" + ], + "processor_classes": [], + "model_classes": [ + "PersimmonModel" + ], + "sha": "b8c8d479e29e9ee048e2d0b05b001ac835ad8859" + }, "Pix2StructForConditionalGeneration": { "tokenizer_classes": [ "T5TokenizerFast" @@ -5432,6 +5551,18 @@ ], "sha": "d46f0a83324e5865420a27a738ef203292de3479" }, + "SpeechT5ForTextToSpeech": { + "tokenizer_classes": [ + "SpeechT5Tokenizer" + ], + "processor_classes": [ + "SpeechT5FeatureExtractor" + ], + "model_classes": [ + "SpeechT5ForTextToSpeech" + ], + "sha": "922e748d9e1ea256a8d9259782021cd3820d5924" + }, "SpeechT5Model": { "tokenizer_classes": [ "SpeechT5Tokenizer" @@ -6254,6 +6385,16 @@ ], "sha": "85020189fb7bf1217eb9370b09bca8ec5bcfdafa" }, + "VitsModel": { + "tokenizer_classes": [ + "VitsTokenizer" + ], + "processor_classes": [], + "model_classes": [ + "VitsModel" + ], + "sha": "b9a20ca5b6a7874576e485850260578895587dd2" + }, "Wav2Vec2ConformerForAudioFrameClassification": { "tokenizer_classes": [ "Wav2Vec2CTCTokenizer"