From f295fc8a164b5882df94266a7f371c3158c930f5 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Tue, 25 Jul 2023 07:56:04 -0400 Subject: [PATCH] Fix last models for common tests that are too big. (#25058) * Fix last models for common tests that are too big. * Remove print statement --- .../perceiver/configuration_perceiver.py | 6 +++ .../models/perceiver/modeling_perceiver.py | 14 +++---- .../configuration_table_transformer.py | 15 ++++++- .../models/layoutlm/test_modeling_layoutlm.py | 4 -- .../layoutlmv2/test_modeling_layoutlmv2.py | 2 +- .../oneformer/test_modeling_oneformer.py | 18 ++++----- .../perceiver/test_modeling_perceiver.py | 9 +++-- .../segformer/test_modeling_segformer.py | 10 ++--- .../segformer/test_modeling_tf_segformer.py | 6 +-- .../models/speecht5/test_modeling_speecht5.py | 39 +++++++++---------- .../swiftformer/test_modeling_swiftformer.py | 10 ++--- .../test_modeling_table_transformer.py | 35 ++++++++++------- .../test_modeling_timm_backbone.py | 4 +- tests/models/tvlt/test_modeling_tvlt.py | 10 ++--- tests/models/upernet/test_modeling_upernet.py | 10 ++--- .../models/videomae/test_modeling_videomae.py | 8 ++-- tests/models/vit_mae/test_modeling_vit_mae.py | 8 ++-- tests/models/vivit/test_modeling_vivit.py | 8 +--- tests/test_modeling_common.py | 1 - 19 files changed, 109 insertions(+), 108 deletions(-) diff --git a/src/transformers/models/perceiver/configuration_perceiver.py b/src/transformers/models/perceiver/configuration_perceiver.py index 86f5268fed..182e92b8a3 100644 --- a/src/transformers/models/perceiver/configuration_perceiver.py +++ b/src/transformers/models/perceiver/configuration_perceiver.py @@ -97,6 +97,8 @@ class PerceiverConfig(PretrainedConfig): Number of audio samples per frame for the multimodal autoencoding model. samples_per_patch (`int`, *optional*, defaults to 16): Number of audio samples per patch when preprocessing the audio for the multimodal autoencoding model. + output_num_channels (`int`, *optional*, defaults to 512): + Number of output channels for each modalitiy decoder. output_shape (`List[int]`, *optional*, defaults to `[1, 16, 224, 224]`): Shape of the output (batch_size, num_frames, height, width) for the video decoder queries of the multimodal autoencoding model. This excludes the channel dimension. @@ -144,6 +146,8 @@ class PerceiverConfig(PretrainedConfig): audio_samples_per_frame=1920, samples_per_patch=16, output_shape=[1, 16, 224, 224], + output_num_channels=512, + _label_trainable_num_channels=1024, **kwargs, ): super().__init__(**kwargs) @@ -177,6 +181,8 @@ class PerceiverConfig(PretrainedConfig): self.audio_samples_per_frame = audio_samples_per_frame self.samples_per_patch = samples_per_patch self.output_shape = output_shape + self.output_num_channels = output_num_channels + self._label_trainable_num_channels = _label_trainable_num_channels class PerceiverOnnxConfig(OnnxConfig): diff --git a/src/transformers/models/perceiver/modeling_perceiver.py b/src/transformers/models/perceiver/modeling_perceiver.py index 9d2e2fcf98..bb7ac2bc31 100755 --- a/src/transformers/models/perceiver/modeling_perceiver.py +++ b/src/transformers/models/perceiver/modeling_perceiver.py @@ -1830,7 +1830,7 @@ class PerceiverForMultimodalAutoencoding(PerceiverPreTrainedModel): # Autoencoding, don't pass inputs to the queries. concat_preprocessed_input=False, output_shape=config.output_shape, - output_num_channels=512, + output_num_channels=config.output_num_channels, use_query_residual=False, position_encoding_only=True, position_encoding_type="fourier", @@ -1854,7 +1854,7 @@ class PerceiverForMultimodalAutoencoding(PerceiverPreTrainedModel): # Autoencoding, don't pass inputs to the queries. concat_preprocessed_input=False, output_index_dims=(n_audio_samples // config.samples_per_patch,), - output_num_channels=512, + output_num_channels=config.output_num_channels, use_query_residual=False, position_encoding_only=True, position_encoding_type="fourier", @@ -1874,21 +1874,21 @@ class PerceiverForMultimodalAutoencoding(PerceiverPreTrainedModel): position_encoding_only=True, position_encoding_type="trainable", trainable_position_encoding_kwargs={ - "num_channels": 1024, + "num_channels": config._label_trainable_num_channels, "index_dims": 1, }, ), }, num_outputs=None, - output_num_channels=512, + output_num_channels=config.output_num_channels, use_query_residual=False, ) output_postprocessor = PerceiverMultimodalPostprocessor( modalities={ - "audio": PerceiverAudioPostprocessor(config, in_channels=512), - "image": PerceiverProjectionPostprocessor(in_channels=512, out_channels=3), - "label": PerceiverClassificationPostprocessor(config, in_channels=512), + "audio": PerceiverAudioPostprocessor(config, in_channels=config.output_num_channels), + "image": PerceiverProjectionPostprocessor(in_channels=config.output_num_channels, out_channels=3), + "label": PerceiverClassificationPostprocessor(config, in_channels=config.output_num_channels), } ) diff --git a/src/transformers/models/table_transformer/configuration_table_transformer.py b/src/transformers/models/table_transformer/configuration_table_transformer.py index 94213008c6..250816ef49 100644 --- a/src/transformers/models/table_transformer/configuration_table_transformer.py +++ b/src/transformers/models/table_transformer/configuration_table_transformer.py @@ -13,9 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """ Table Transformer model configuration""" - +import copy from collections import OrderedDict -from typing import Mapping +from typing import Dict, Mapping from packaging import version @@ -237,6 +237,17 @@ class TableTransformerConfig(PretrainedConfig): def hidden_size(self) -> int: return self.d_model + def to_dict(self) -> Dict[str, any]: + """ + Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`]. Returns: + `Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance, + """ + output = copy.deepcopy(self.__dict__) + if output["backbone_config"] is not None: + output["backbone_config"] = self.backbone_config.to_dict() + output["model_type"] = self.__class__.model_type + return output + # Copied from transformers.models.detr.configuration_detr.DetrOnnxConfig class TableTransformerOnnxConfig(OnnxConfig): diff --git a/tests/models/layoutlm/test_modeling_layoutlm.py b/tests/models/layoutlm/test_modeling_layoutlm.py index 687d1ae4a5..0535fbf4e1 100644 --- a/tests/models/layoutlm/test_modeling_layoutlm.py +++ b/tests/models/layoutlm/test_modeling_layoutlm.py @@ -279,10 +279,6 @@ class LayoutLMModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_question_answering(*config_and_inputs) - @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.") - def test_model_is_small(self): - pass - def prepare_layoutlm_batch_inputs(): # Here we prepare a batch of 2 sequences to test a LayoutLM forward pass on: diff --git a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py index 4eda8952c3..c8457331c5 100644 --- a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py +++ b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py @@ -415,7 +415,7 @@ class LayoutLMv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa check_hidden_states_output(inputs_dict, config, model_class) - @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.") + @unittest.skip("We cannot configure detectron2 to output a smaller backbone") def test_model_is_small(self): pass diff --git a/tests/models/oneformer/test_modeling_oneformer.py b/tests/models/oneformer/test_modeling_oneformer.py index f23c0f265c..ef4a45021a 100644 --- a/tests/models/oneformer/test_modeling_oneformer.py +++ b/tests/models/oneformer/test_modeling_oneformer.py @@ -112,16 +112,20 @@ class OneFormerModelTester: config = OneFormerConfig( text_encoder_vocab_size=self.vocab_size, hidden_size=self.hidden_dim, + num_queries=self.num_queries, + num_labels=self.num_labels, + encoder_feedforward_dim=32, + dim_feedforward=64, + encoder_layers=2, + decoder_layers=2, ) - config.num_queries = self.num_queries - config.num_labels = self.num_labels - + config.backbone_config.embed_dim = 16 config.backbone_config.depths = [1, 1, 1, 1] + config.backbone_config.hidden_size = 16 config.backbone_config.num_channels = self.num_channels + config.backbone_config.num_heads = [1, 1, 2, 2] - config.encoder_feedforward_dim = 64 - config.dim_feedforward = 128 config.hidden_dim = self.hidden_dim config.mask_dim = self.hidden_dim config.conv_dim = self.hidden_dim @@ -309,10 +313,6 @@ class OneFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas expected_arg_names = ["pixel_values", "task_inputs"] self.assertListEqual(arg_names[:2], expected_arg_names) - @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.") - def test_model_is_small(self): - pass - @slow def test_model_from_pretrained(self): for model_name in ["shi-labs/oneformer_ade20k_swin_tiny"]: diff --git a/tests/models/perceiver/test_modeling_perceiver.py b/tests/models/perceiver/test_modeling_perceiver.py index 23bd75bdd1..91fac90e7b 100644 --- a/tests/models/perceiver/test_modeling_perceiver.py +++ b/tests/models/perceiver/test_modeling_perceiver.py @@ -79,6 +79,7 @@ class PerceiverModelTester: nchunks=20, num_latents=10, d_latents=20, + d_model=64, num_blocks=1, num_self_attends_per_block=2, num_self_attention_heads=1, @@ -108,6 +109,7 @@ class PerceiverModelTester: self.nchunks = nchunks self.num_latents = num_latents self.d_latents = d_latents + self.d_model = d_model self.num_blocks = num_blocks self.num_self_attends_per_block = num_self_attends_per_block self.num_self_attention_heads = num_self_attention_heads @@ -181,6 +183,7 @@ class PerceiverModelTester: return PerceiverConfig( num_latents=self.num_latents, d_latents=self.d_latents, + d_model=self.d_model, qk_channels=self.d_latents, v_channels=self.d_latents, num_blocks=self.num_blocks, @@ -200,6 +203,8 @@ class PerceiverModelTester: audio_samples_per_frame=self.audio_samples_per_frame, samples_per_patch=self.samples_per_patch, num_labels=self.num_labels, + output_num_channels=32, + _label_trainable_num_channels=16, ) def get_pipeline_config(self): @@ -784,10 +789,6 @@ class PerceiverModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas loss.backward() - @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.") - def test_model_is_small(self): - pass - @require_torch_multi_gpu @unittest.skip( reason=( diff --git a/tests/models/segformer/test_modeling_segformer.py b/tests/models/segformer/test_modeling_segformer.py index 7f19001148..0506be9b1f 100644 --- a/tests/models/segformer/test_modeling_segformer.py +++ b/tests/models/segformer/test_modeling_segformer.py @@ -61,11 +61,11 @@ class SegformerModelTester: image_size=64, num_channels=3, num_encoder_blocks=4, - depths=[2, 2, 2, 2], + depths=[1, 1, 1, 1], sr_ratios=[8, 4, 2, 1], - hidden_sizes=[16, 32, 64, 128], + hidden_sizes=[8, 8, 16, 16], downsampling_rates=[1, 4, 8, 16], - num_attention_heads=[1, 2, 4, 8], + num_attention_heads=[1, 1, 2, 2], is_training=True, use_labels=True, hidden_act="gelu", @@ -347,10 +347,6 @@ class SegformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas loss = model(**inputs).loss loss.backward() - @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.") - def test_model_is_small(self): - pass - @slow def test_model_from_pretrained(self): for model_name in SEGFORMER_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: diff --git a/tests/models/segformer/test_modeling_tf_segformer.py b/tests/models/segformer/test_modeling_tf_segformer.py index b4dc657b79..aca621f509 100644 --- a/tests/models/segformer/test_modeling_tf_segformer.py +++ b/tests/models/segformer/test_modeling_tf_segformer.py @@ -58,11 +58,11 @@ class TFSegformerModelTester: image_size=64, num_channels=3, num_encoder_blocks=4, - depths=[2, 2, 2, 2], + depths=[1, 1, 1, 1], sr_ratios=[8, 4, 2, 1], - hidden_sizes=[16, 32, 64, 128], + hidden_sizes=[8, 8, 16, 16], downsampling_rates=[1, 4, 8, 16], - num_attention_heads=[1, 2, 4, 8], + num_attention_heads=[1, 1, 2, 2], is_training=True, use_labels=True, hidden_act="gelu", diff --git a/tests/models/speecht5/test_modeling_speecht5.py b/tests/models/speecht5/test_modeling_speecht5.py index fbd09ef500..c357259d78 100644 --- a/tests/models/speecht5/test_modeling_speecht5.py +++ b/tests/models/speecht5/test_modeling_speecht5.py @@ -238,10 +238,6 @@ class SpeechT5ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase # disabled because this model doesn't have decoder_input_ids pass - @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.") - def test_model_is_small(self): - pass - @require_torch class SpeechT5ForSpeechToTextTester: @@ -705,10 +701,6 @@ class SpeechT5ForSpeechToTextTest(ModelTesterMixin, unittest.TestCase): def test_training_gradient_checkpointing(self): pass - @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.") - def test_model_is_small(self): - pass - # overwrite from test_modeling_common def _mock_init_weights(self, module): if hasattr(module, "weight") and module.weight is not None: @@ -800,6 +792,9 @@ class SpeechT5ForTextToSpeechTester: vocab_size=81, num_mel_bins=20, reduction_factor=2, + speech_decoder_postnet_layers=2, + speech_decoder_postnet_units=32, + speech_decoder_prenet_units=32, ): self.parent = parent self.batch_size = batch_size @@ -813,6 +808,9 @@ class SpeechT5ForTextToSpeechTester: self.vocab_size = vocab_size self.num_mel_bins = num_mel_bins self.reduction_factor = reduction_factor + self.speech_decoder_postnet_layers = speech_decoder_postnet_layers + self.speech_decoder_postnet_units = speech_decoder_postnet_units + self.speech_decoder_prenet_units = speech_decoder_prenet_units def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size).clamp(2) @@ -847,6 +845,9 @@ class SpeechT5ForTextToSpeechTester: vocab_size=self.vocab_size, num_mel_bins=self.num_mel_bins, reduction_factor=self.reduction_factor, + speech_decoder_postnet_layers=self.speech_decoder_postnet_layers, + speech_decoder_postnet_units=self.speech_decoder_postnet_units, + speech_decoder_prenet_units=self.speech_decoder_prenet_units, ) def create_and_check_model_forward(self, config, inputs_dict): @@ -996,10 +997,6 @@ class SpeechT5ForTextToSpeechTest(ModelTesterMixin, unittest.TestCase): if hasattr(module, "bias") and module.bias is not None: module.bias.data.fill_(3) - @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.") - def test_model_is_small(self): - pass - @require_torch @require_sentencepiece @@ -1046,6 +1043,9 @@ class SpeechT5ForSpeechToSpeechTester: vocab_size=81, num_mel_bins=20, reduction_factor=2, + speech_decoder_postnet_layers=2, + speech_decoder_postnet_units=32, + speech_decoder_prenet_units=32, ): self.parent = parent self.batch_size = batch_size @@ -1065,6 +1065,9 @@ class SpeechT5ForSpeechToSpeechTester: self.vocab_size = vocab_size self.num_mel_bins = num_mel_bins self.reduction_factor = reduction_factor + self.speech_decoder_postnet_layers = speech_decoder_postnet_layers + self.speech_decoder_postnet_units = speech_decoder_postnet_units + self.speech_decoder_prenet_units = speech_decoder_prenet_units def prepare_config_and_inputs(self): input_values = floats_tensor([self.batch_size, self.encoder_seq_length], scale=1.0) @@ -1105,6 +1108,9 @@ class SpeechT5ForSpeechToSpeechTester: vocab_size=self.vocab_size, num_mel_bins=self.num_mel_bins, reduction_factor=self.reduction_factor, + speech_decoder_postnet_layers=self.speech_decoder_postnet_layers, + speech_decoder_postnet_units=self.speech_decoder_postnet_units, + speech_decoder_prenet_units=self.speech_decoder_prenet_units, ) def create_and_check_model_forward(self, config, inputs_dict): @@ -1416,10 +1422,6 @@ class SpeechT5ForSpeechToSpeechTest(ModelTesterMixin, unittest.TestCase): if hasattr(module, "masked_spec_embed") and module.masked_spec_embed is not None: module.masked_spec_embed.data.fill_(3) - @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.") - def test_model_is_small(self): - pass - @require_torch @require_sentencepiece @@ -1478,6 +1480,7 @@ class SpeechT5HifiGanTester: def get_config(self): return SpeechT5HifiGanConfig( model_in_dim=self.num_mel_bins, + upsample_initial_channel=32, ) def create_and_check_model(self, config, input_values): @@ -1562,10 +1565,6 @@ class SpeechT5HifiGanTest(ModelTesterMixin, unittest.TestCase): def test_retain_grad_hidden_states_attentions(self): pass - @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.") - def test_model_is_small(self): - pass - # skip because it fails on automapping of SpeechT5HifiGanConfig def test_save_load_fast_init_from_base(self): pass diff --git a/tests/models/swiftformer/test_modeling_swiftformer.py b/tests/models/swiftformer/test_modeling_swiftformer.py index 151807c80c..3e286cc320 100644 --- a/tests/models/swiftformer/test_modeling_swiftformer.py +++ b/tests/models/swiftformer/test_modeling_swiftformer.py @@ -58,9 +58,9 @@ class SwiftFormerModelTester: hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, image_size=224, - num_labels=1000, - layer_depths=[3, 3, 6, 4], - embed_dims=[48, 56, 112, 220], + num_labels=3, + layer_depths=[1, 1, 1, 1], + embed_dims=[16, 16, 32, 32], ): self.parent = parent self.batch_size = batch_size @@ -272,10 +272,6 @@ class SwiftFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC msg=f"Parameter {name} of model {model_class} seems not properly initialized", ) - @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.") - def test_model_is_small(self): - pass - # We will verify our results on an image of cute cats def prepare_img(): diff --git a/tests/models/table_transformer/test_modeling_table_transformer.py b/tests/models/table_transformer/test_modeling_table_transformer.py index 0df8da45cb..d81c52ff13 100644 --- a/tests/models/table_transformer/test_modeling_table_transformer.py +++ b/tests/models/table_transformer/test_modeling_table_transformer.py @@ -21,8 +21,8 @@ import unittest from huggingface_hub import hf_hub_download -from transformers import TableTransformerConfig, is_timm_available, is_vision_available -from transformers.testing_utils import require_timm, require_vision, slow, torch_device +from transformers import ResNetConfig, TableTransformerConfig, is_torch_available, is_vision_available +from transformers.testing_utils import require_timm, require_torch, require_vision, slow, torch_device from ...generation.test_utils import GenerationTesterMixin from ...test_configuration_common import ConfigTester @@ -30,10 +30,10 @@ from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_ from ...test_pipeline_mixin import PipelineTesterMixin -if is_timm_available(): +if is_torch_available(): import torch - from transformers import ResNetConfig, TableTransformerForObjectDetection, TableTransformerModel + from transformers import TableTransformerForObjectDetection, TableTransformerModel if is_vision_available(): @@ -49,7 +49,7 @@ class TableTransformerModelTester: batch_size=8, is_training=True, use_labels=True, - hidden_size=256, + hidden_size=32, num_hidden_layers=2, num_attention_heads=8, intermediate_size=4, @@ -61,7 +61,7 @@ class TableTransformerModelTester: min_size=200, max_size=200, n_targets=8, - num_labels=91, + num_labels=3, ): self.parent = parent self.batch_size = batch_size @@ -107,6 +107,16 @@ class TableTransformerModelTester: return config, pixel_values, pixel_mask, labels def get_config(self): + resnet_config = ResNetConfig( + num_channels=3, + embeddings_size=10, + hidden_sizes=[10, 20, 30, 40], + depths=[1, 1, 2, 1], + hidden_act="relu", + num_labels=3, + out_features=["stage2", "stage3", "stage4"], + out_indices=[2, 3, 4], + ) return TableTransformerConfig( d_model=self.hidden_size, encoder_layers=self.num_hidden_layers, @@ -119,6 +129,8 @@ class TableTransformerModelTester: attention_dropout=self.attention_probs_dropout_prob, num_queries=self.num_queries, num_labels=self.num_labels, + use_timm_backbone=False, + backbone_config=resnet_config, ) def prepare_config_and_inputs_for_common(self): @@ -175,19 +187,19 @@ class TableTransformerModelTester: self.parent.assertEqual(result.pred_boxes.shape, (self.batch_size, self.num_queries, 4)) -@require_timm +@require_torch class TableTransformerModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase): all_model_classes = ( ( TableTransformerModel, TableTransformerForObjectDetection, ) - if is_timm_available() + if is_torch_available() else () ) pipeline_model_mapping = ( {"feature-extraction": TableTransformerModel, "object-detection": TableTransformerForObjectDetection} - if is_timm_available() + if is_torch_available() else {} ) is_encoder_decoder = True @@ -453,6 +465,7 @@ class TableTransformerModelTest(ModelTesterMixin, GenerationTesterMixin, Pipelin # let's set num_channels to 1 config.num_channels = 1 + config.backbone_config.num_channels = 1 for model_class in self.all_model_classes: model = model_class(config) @@ -486,10 +499,6 @@ class TableTransformerModelTest(ModelTesterMixin, GenerationTesterMixin, Pipelin msg=f"Parameter {name} of model {model_class} seems not properly initialized", ) - @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.") - def test_model_is_small(self): - pass - TOLERANCE = 1e-4 diff --git a/tests/models/timm_backbone/test_modeling_timm_backbone.py b/tests/models/timm_backbone/test_modeling_timm_backbone.py index c134a588b6..eaa022422c 100644 --- a/tests/models/timm_backbone/test_modeling_timm_backbone.py +++ b/tests/models/timm_backbone/test_modeling_timm_backbone.py @@ -42,7 +42,7 @@ class TimmBackboneModelTester: out_indices=None, out_features=None, stage_names=None, - backbone="resnet50", + backbone="resnet18", batch_size=3, image_size=32, num_channels=3, @@ -196,7 +196,7 @@ class TimmBackboneModelTest(ModelTesterMixin, BackboneTesterMixin, PipelineTeste def test_can_use_safetensors(self): pass - @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.") + @unittest.skip("Need to use a timm backbone and there is no tiny model available.") def test_model_is_small(self): pass diff --git a/tests/models/tvlt/test_modeling_tvlt.py b/tests/models/tvlt/test_modeling_tvlt.py index 41eefc9eb7..e437b2651e 100644 --- a/tests/models/tvlt/test_modeling_tvlt.py +++ b/tests/models/tvlt/test_modeling_tvlt.py @@ -67,8 +67,8 @@ class TvltModelTester: num_image_channels=3, num_audio_channels=1, num_frames=2, - hidden_size=128, - num_hidden_layers=12, + hidden_size=32, + num_hidden_layers=3, num_attention_heads=4, intermediate_size=128, hidden_act="gelu", @@ -79,7 +79,7 @@ class TvltModelTester: qkv_bias=True, use_mean_pooling=True, decoder_num_attention_heads=4, - decoder_hidden_size=64, + decoder_hidden_size=32, decoder_num_hidden_layers=2, decoder_intermediate_size=128, image_mask_ratio=0.75, @@ -542,10 +542,6 @@ class TvltModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): check_hidden_states_output(inputs_dict, config, model_class) - @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.") - def test_model_is_small(self): - pass - # We will verify our results on a video of eating spaghetti # Frame indices used: [164 168 172 176 181 185 189 193 198 202 206 210 215 219 223 227] diff --git a/tests/models/upernet/test_modeling_upernet.py b/tests/models/upernet/test_modeling_upernet.py index 97ba37f8be..84c32f7233 100644 --- a/tests/models/upernet/test_modeling_upernet.py +++ b/tests/models/upernet/test_modeling_upernet.py @@ -51,7 +51,7 @@ class UperNetModelTester: num_channels=3, num_stages=4, hidden_sizes=[10, 20, 30, 40], - depths=[2, 2, 3, 2], + depths=[1, 1, 1, 1], is_training=True, use_labels=True, intermediate_size=37, @@ -106,12 +106,12 @@ class UperNetModelTester: def get_config(self): return UperNetConfig( backbone_config=self.get_backbone_config(), - hidden_size=512, + hidden_size=64, pool_scales=[1, 2, 3, 6], use_auxiliary_head=True, auxiliary_loss_weight=0.4, auxiliary_in_channels=40, - auxiliary_channels=256, + auxiliary_channels=32, auxiliary_num_convs=1, auxiliary_concat_input=False, loss_ignore_index=255, @@ -207,10 +207,6 @@ class UperNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase) def test_multi_gpu_data_parallel_forward(self): pass - @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.") - def test_model_is_small(self): - pass - def test_hidden_states_output(self): def check_hidden_states_output(inputs_dict, config, model_class): model = model_class(config) diff --git a/tests/models/videomae/test_modeling_videomae.py b/tests/models/videomae/test_modeling_videomae.py index 6f2e7fa31e..85a0d2714e 100644 --- a/tests/models/videomae/test_modeling_videomae.py +++ b/tests/models/videomae/test_modeling_videomae.py @@ -130,6 +130,10 @@ class VideoMAEModelTester: attention_probs_dropout_prob=self.attention_probs_dropout_prob, is_decoder=False, initializer_range=self.initializer_range, + decoder_hidden_size=self.hidden_size, + decoder_intermediate_size=self.intermediate_size, + decoder_num_attention_heads=self.num_attention_heads, + decoder_num_hidden_layers=self.num_hidden_layers, ) def create_and_check_model(self, config, pixel_values, labels): @@ -344,10 +348,6 @@ class VideoMAEModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase check_hidden_states_output(inputs_dict, config, model_class) - @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.") - def test_model_is_small(self): - pass - # We will verify our results on a video of eating spaghetti # Frame indices used: [164 168 172 176 181 185 189 193 198 202 206 210 215 219 223 227] diff --git a/tests/models/vit_mae/test_modeling_vit_mae.py b/tests/models/vit_mae/test_modeling_vit_mae.py index bb50cb9606..3cedb0c176 100644 --- a/tests/models/vit_mae/test_modeling_vit_mae.py +++ b/tests/models/vit_mae/test_modeling_vit_mae.py @@ -118,6 +118,10 @@ class ViTMAEModelTester: is_decoder=False, initializer_range=self.initializer_range, mask_ratio=self.mask_ratio, + decoder_hidden_size=self.hidden_size, + decoder_intermediate_size=self.intermediate_size, + decoder_num_attention_heads=self.num_attention_heads, + decoder_num_hidden_layers=self.num_hidden_layers, ) def create_and_check_model(self, config, pixel_values, labels): @@ -279,10 +283,6 @@ class ViTMAEModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): def test_model_outputs_equivalence(self): pass - @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.") - def test_model_is_small(self): - pass - @slow def test_model_from_pretrained(self): for model_name in VIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: diff --git a/tests/models/vivit/test_modeling_vivit.py b/tests/models/vivit/test_modeling_vivit.py index d7d72eca7e..43db8bad7b 100644 --- a/tests/models/vivit/test_modeling_vivit.py +++ b/tests/models/vivit/test_modeling_vivit.py @@ -55,8 +55,8 @@ class VivitModelTester: num_frames=8, # decreased, because default 32 takes too much RAM at inference tubelet_size=[2, 4, 4], num_channels=3, - hidden_size=768, - num_hidden_layers=5, + hidden_size=32, + num_hidden_layers=2, num_attention_heads=4, intermediate_size=37, hidden_act="gelu_fast", @@ -310,10 +310,6 @@ class VivitModelTest(ModelTesterMixin, unittest.TestCase): check_hidden_states_output(inputs_dict, config, model_class) - @unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.") - def test_model_is_small(self): - pass - # We will verify our results on a video of eating spaghetti # Frame indices used: [164 168 172 176 181 185 189 193 198 202 206 210 215 219 223 227] diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index 0d5080ec5a..87960983f9 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -2708,7 +2708,6 @@ class ModelTesterMixin: def test_model_is_small(self): # Just a consistency check to make sure we are not running tests on 80M parameter models. config, _ = self.model_tester.prepare_config_and_inputs_for_common() - # print(config) for model_class in self.all_model_classes: model = model_class(config)