Fix last models for common tests that are too big. (#25058)
* Fix last models for common tests that are too big. * Remove print statement
This commit is contained in:
@@ -97,6 +97,8 @@ class PerceiverConfig(PretrainedConfig):
|
||||
Number of audio samples per frame for the multimodal autoencoding model.
|
||||
samples_per_patch (`int`, *optional*, defaults to 16):
|
||||
Number of audio samples per patch when preprocessing the audio for the multimodal autoencoding model.
|
||||
output_num_channels (`int`, *optional*, defaults to 512):
|
||||
Number of output channels for each modalitiy decoder.
|
||||
output_shape (`List[int]`, *optional*, defaults to `[1, 16, 224, 224]`):
|
||||
Shape of the output (batch_size, num_frames, height, width) for the video decoder queries of the multimodal
|
||||
autoencoding model. This excludes the channel dimension.
|
||||
@@ -144,6 +146,8 @@ class PerceiverConfig(PretrainedConfig):
|
||||
audio_samples_per_frame=1920,
|
||||
samples_per_patch=16,
|
||||
output_shape=[1, 16, 224, 224],
|
||||
output_num_channels=512,
|
||||
_label_trainable_num_channels=1024,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
@@ -177,6 +181,8 @@ class PerceiverConfig(PretrainedConfig):
|
||||
self.audio_samples_per_frame = audio_samples_per_frame
|
||||
self.samples_per_patch = samples_per_patch
|
||||
self.output_shape = output_shape
|
||||
self.output_num_channels = output_num_channels
|
||||
self._label_trainable_num_channels = _label_trainable_num_channels
|
||||
|
||||
|
||||
class PerceiverOnnxConfig(OnnxConfig):
|
||||
|
||||
@@ -1830,7 +1830,7 @@ class PerceiverForMultimodalAutoencoding(PerceiverPreTrainedModel):
|
||||
# Autoencoding, don't pass inputs to the queries.
|
||||
concat_preprocessed_input=False,
|
||||
output_shape=config.output_shape,
|
||||
output_num_channels=512,
|
||||
output_num_channels=config.output_num_channels,
|
||||
use_query_residual=False,
|
||||
position_encoding_only=True,
|
||||
position_encoding_type="fourier",
|
||||
@@ -1854,7 +1854,7 @@ class PerceiverForMultimodalAutoencoding(PerceiverPreTrainedModel):
|
||||
# Autoencoding, don't pass inputs to the queries.
|
||||
concat_preprocessed_input=False,
|
||||
output_index_dims=(n_audio_samples // config.samples_per_patch,),
|
||||
output_num_channels=512,
|
||||
output_num_channels=config.output_num_channels,
|
||||
use_query_residual=False,
|
||||
position_encoding_only=True,
|
||||
position_encoding_type="fourier",
|
||||
@@ -1874,21 +1874,21 @@ class PerceiverForMultimodalAutoencoding(PerceiverPreTrainedModel):
|
||||
position_encoding_only=True,
|
||||
position_encoding_type="trainable",
|
||||
trainable_position_encoding_kwargs={
|
||||
"num_channels": 1024,
|
||||
"num_channels": config._label_trainable_num_channels,
|
||||
"index_dims": 1,
|
||||
},
|
||||
),
|
||||
},
|
||||
num_outputs=None,
|
||||
output_num_channels=512,
|
||||
output_num_channels=config.output_num_channels,
|
||||
use_query_residual=False,
|
||||
)
|
||||
|
||||
output_postprocessor = PerceiverMultimodalPostprocessor(
|
||||
modalities={
|
||||
"audio": PerceiverAudioPostprocessor(config, in_channels=512),
|
||||
"image": PerceiverProjectionPostprocessor(in_channels=512, out_channels=3),
|
||||
"label": PerceiverClassificationPostprocessor(config, in_channels=512),
|
||||
"audio": PerceiverAudioPostprocessor(config, in_channels=config.output_num_channels),
|
||||
"image": PerceiverProjectionPostprocessor(in_channels=config.output_num_channels, out_channels=3),
|
||||
"label": PerceiverClassificationPostprocessor(config, in_channels=config.output_num_channels),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@@ -13,9 +13,9 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
""" Table Transformer model configuration"""
|
||||
|
||||
import copy
|
||||
from collections import OrderedDict
|
||||
from typing import Mapping
|
||||
from typing import Dict, Mapping
|
||||
|
||||
from packaging import version
|
||||
|
||||
@@ -237,6 +237,17 @@ class TableTransformerConfig(PretrainedConfig):
|
||||
def hidden_size(self) -> int:
|
||||
return self.d_model
|
||||
|
||||
def to_dict(self) -> Dict[str, any]:
|
||||
"""
|
||||
Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`]. Returns:
|
||||
`Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance,
|
||||
"""
|
||||
output = copy.deepcopy(self.__dict__)
|
||||
if output["backbone_config"] is not None:
|
||||
output["backbone_config"] = self.backbone_config.to_dict()
|
||||
output["model_type"] = self.__class__.model_type
|
||||
return output
|
||||
|
||||
|
||||
# Copied from transformers.models.detr.configuration_detr.DetrOnnxConfig
|
||||
class TableTransformerOnnxConfig(OnnxConfig):
|
||||
|
||||
@@ -279,10 +279,6 @@ class LayoutLMModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
|
||||
config_and_inputs = self.model_tester.prepare_config_and_inputs()
|
||||
self.model_tester.create_and_check_for_question_answering(*config_and_inputs)
|
||||
|
||||
@unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
|
||||
def test_model_is_small(self):
|
||||
pass
|
||||
|
||||
|
||||
def prepare_layoutlm_batch_inputs():
|
||||
# Here we prepare a batch of 2 sequences to test a LayoutLM forward pass on:
|
||||
|
||||
@@ -415,7 +415,7 @@ class LayoutLMv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
|
||||
|
||||
check_hidden_states_output(inputs_dict, config, model_class)
|
||||
|
||||
@unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
|
||||
@unittest.skip("We cannot configure detectron2 to output a smaller backbone")
|
||||
def test_model_is_small(self):
|
||||
pass
|
||||
|
||||
|
||||
@@ -112,16 +112,20 @@ class OneFormerModelTester:
|
||||
config = OneFormerConfig(
|
||||
text_encoder_vocab_size=self.vocab_size,
|
||||
hidden_size=self.hidden_dim,
|
||||
num_queries=self.num_queries,
|
||||
num_labels=self.num_labels,
|
||||
encoder_feedforward_dim=32,
|
||||
dim_feedforward=64,
|
||||
encoder_layers=2,
|
||||
decoder_layers=2,
|
||||
)
|
||||
|
||||
config.num_queries = self.num_queries
|
||||
config.num_labels = self.num_labels
|
||||
|
||||
config.backbone_config.embed_dim = 16
|
||||
config.backbone_config.depths = [1, 1, 1, 1]
|
||||
config.backbone_config.hidden_size = 16
|
||||
config.backbone_config.num_channels = self.num_channels
|
||||
config.backbone_config.num_heads = [1, 1, 2, 2]
|
||||
|
||||
config.encoder_feedforward_dim = 64
|
||||
config.dim_feedforward = 128
|
||||
config.hidden_dim = self.hidden_dim
|
||||
config.mask_dim = self.hidden_dim
|
||||
config.conv_dim = self.hidden_dim
|
||||
@@ -309,10 +313,6 @@ class OneFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas
|
||||
expected_arg_names = ["pixel_values", "task_inputs"]
|
||||
self.assertListEqual(arg_names[:2], expected_arg_names)
|
||||
|
||||
@unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
|
||||
def test_model_is_small(self):
|
||||
pass
|
||||
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
for model_name in ["shi-labs/oneformer_ade20k_swin_tiny"]:
|
||||
|
||||
@@ -79,6 +79,7 @@ class PerceiverModelTester:
|
||||
nchunks=20,
|
||||
num_latents=10,
|
||||
d_latents=20,
|
||||
d_model=64,
|
||||
num_blocks=1,
|
||||
num_self_attends_per_block=2,
|
||||
num_self_attention_heads=1,
|
||||
@@ -108,6 +109,7 @@ class PerceiverModelTester:
|
||||
self.nchunks = nchunks
|
||||
self.num_latents = num_latents
|
||||
self.d_latents = d_latents
|
||||
self.d_model = d_model
|
||||
self.num_blocks = num_blocks
|
||||
self.num_self_attends_per_block = num_self_attends_per_block
|
||||
self.num_self_attention_heads = num_self_attention_heads
|
||||
@@ -181,6 +183,7 @@ class PerceiverModelTester:
|
||||
return PerceiverConfig(
|
||||
num_latents=self.num_latents,
|
||||
d_latents=self.d_latents,
|
||||
d_model=self.d_model,
|
||||
qk_channels=self.d_latents,
|
||||
v_channels=self.d_latents,
|
||||
num_blocks=self.num_blocks,
|
||||
@@ -200,6 +203,8 @@ class PerceiverModelTester:
|
||||
audio_samples_per_frame=self.audio_samples_per_frame,
|
||||
samples_per_patch=self.samples_per_patch,
|
||||
num_labels=self.num_labels,
|
||||
output_num_channels=32,
|
||||
_label_trainable_num_channels=16,
|
||||
)
|
||||
|
||||
def get_pipeline_config(self):
|
||||
@@ -784,10 +789,6 @@ class PerceiverModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas
|
||||
|
||||
loss.backward()
|
||||
|
||||
@unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
|
||||
def test_model_is_small(self):
|
||||
pass
|
||||
|
||||
@require_torch_multi_gpu
|
||||
@unittest.skip(
|
||||
reason=(
|
||||
|
||||
@@ -61,11 +61,11 @@ class SegformerModelTester:
|
||||
image_size=64,
|
||||
num_channels=3,
|
||||
num_encoder_blocks=4,
|
||||
depths=[2, 2, 2, 2],
|
||||
depths=[1, 1, 1, 1],
|
||||
sr_ratios=[8, 4, 2, 1],
|
||||
hidden_sizes=[16, 32, 64, 128],
|
||||
hidden_sizes=[8, 8, 16, 16],
|
||||
downsampling_rates=[1, 4, 8, 16],
|
||||
num_attention_heads=[1, 2, 4, 8],
|
||||
num_attention_heads=[1, 1, 2, 2],
|
||||
is_training=True,
|
||||
use_labels=True,
|
||||
hidden_act="gelu",
|
||||
@@ -347,10 +347,6 @@ class SegformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas
|
||||
loss = model(**inputs).loss
|
||||
loss.backward()
|
||||
|
||||
@unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
|
||||
def test_model_is_small(self):
|
||||
pass
|
||||
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
for model_name in SEGFORMER_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
|
||||
@@ -58,11 +58,11 @@ class TFSegformerModelTester:
|
||||
image_size=64,
|
||||
num_channels=3,
|
||||
num_encoder_blocks=4,
|
||||
depths=[2, 2, 2, 2],
|
||||
depths=[1, 1, 1, 1],
|
||||
sr_ratios=[8, 4, 2, 1],
|
||||
hidden_sizes=[16, 32, 64, 128],
|
||||
hidden_sizes=[8, 8, 16, 16],
|
||||
downsampling_rates=[1, 4, 8, 16],
|
||||
num_attention_heads=[1, 2, 4, 8],
|
||||
num_attention_heads=[1, 1, 2, 2],
|
||||
is_training=True,
|
||||
use_labels=True,
|
||||
hidden_act="gelu",
|
||||
|
||||
@@ -238,10 +238,6 @@ class SpeechT5ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
|
||||
# disabled because this model doesn't have decoder_input_ids
|
||||
pass
|
||||
|
||||
@unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
|
||||
def test_model_is_small(self):
|
||||
pass
|
||||
|
||||
|
||||
@require_torch
|
||||
class SpeechT5ForSpeechToTextTester:
|
||||
@@ -705,10 +701,6 @@ class SpeechT5ForSpeechToTextTest(ModelTesterMixin, unittest.TestCase):
|
||||
def test_training_gradient_checkpointing(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
|
||||
def test_model_is_small(self):
|
||||
pass
|
||||
|
||||
# overwrite from test_modeling_common
|
||||
def _mock_init_weights(self, module):
|
||||
if hasattr(module, "weight") and module.weight is not None:
|
||||
@@ -800,6 +792,9 @@ class SpeechT5ForTextToSpeechTester:
|
||||
vocab_size=81,
|
||||
num_mel_bins=20,
|
||||
reduction_factor=2,
|
||||
speech_decoder_postnet_layers=2,
|
||||
speech_decoder_postnet_units=32,
|
||||
speech_decoder_prenet_units=32,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
@@ -813,6 +808,9 @@ class SpeechT5ForTextToSpeechTester:
|
||||
self.vocab_size = vocab_size
|
||||
self.num_mel_bins = num_mel_bins
|
||||
self.reduction_factor = reduction_factor
|
||||
self.speech_decoder_postnet_layers = speech_decoder_postnet_layers
|
||||
self.speech_decoder_postnet_units = speech_decoder_postnet_units
|
||||
self.speech_decoder_prenet_units = speech_decoder_prenet_units
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size).clamp(2)
|
||||
@@ -847,6 +845,9 @@ class SpeechT5ForTextToSpeechTester:
|
||||
vocab_size=self.vocab_size,
|
||||
num_mel_bins=self.num_mel_bins,
|
||||
reduction_factor=self.reduction_factor,
|
||||
speech_decoder_postnet_layers=self.speech_decoder_postnet_layers,
|
||||
speech_decoder_postnet_units=self.speech_decoder_postnet_units,
|
||||
speech_decoder_prenet_units=self.speech_decoder_prenet_units,
|
||||
)
|
||||
|
||||
def create_and_check_model_forward(self, config, inputs_dict):
|
||||
@@ -996,10 +997,6 @@ class SpeechT5ForTextToSpeechTest(ModelTesterMixin, unittest.TestCase):
|
||||
if hasattr(module, "bias") and module.bias is not None:
|
||||
module.bias.data.fill_(3)
|
||||
|
||||
@unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
|
||||
def test_model_is_small(self):
|
||||
pass
|
||||
|
||||
|
||||
@require_torch
|
||||
@require_sentencepiece
|
||||
@@ -1046,6 +1043,9 @@ class SpeechT5ForSpeechToSpeechTester:
|
||||
vocab_size=81,
|
||||
num_mel_bins=20,
|
||||
reduction_factor=2,
|
||||
speech_decoder_postnet_layers=2,
|
||||
speech_decoder_postnet_units=32,
|
||||
speech_decoder_prenet_units=32,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
@@ -1065,6 +1065,9 @@ class SpeechT5ForSpeechToSpeechTester:
|
||||
self.vocab_size = vocab_size
|
||||
self.num_mel_bins = num_mel_bins
|
||||
self.reduction_factor = reduction_factor
|
||||
self.speech_decoder_postnet_layers = speech_decoder_postnet_layers
|
||||
self.speech_decoder_postnet_units = speech_decoder_postnet_units
|
||||
self.speech_decoder_prenet_units = speech_decoder_prenet_units
|
||||
|
||||
def prepare_config_and_inputs(self):
|
||||
input_values = floats_tensor([self.batch_size, self.encoder_seq_length], scale=1.0)
|
||||
@@ -1105,6 +1108,9 @@ class SpeechT5ForSpeechToSpeechTester:
|
||||
vocab_size=self.vocab_size,
|
||||
num_mel_bins=self.num_mel_bins,
|
||||
reduction_factor=self.reduction_factor,
|
||||
speech_decoder_postnet_layers=self.speech_decoder_postnet_layers,
|
||||
speech_decoder_postnet_units=self.speech_decoder_postnet_units,
|
||||
speech_decoder_prenet_units=self.speech_decoder_prenet_units,
|
||||
)
|
||||
|
||||
def create_and_check_model_forward(self, config, inputs_dict):
|
||||
@@ -1416,10 +1422,6 @@ class SpeechT5ForSpeechToSpeechTest(ModelTesterMixin, unittest.TestCase):
|
||||
if hasattr(module, "masked_spec_embed") and module.masked_spec_embed is not None:
|
||||
module.masked_spec_embed.data.fill_(3)
|
||||
|
||||
@unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
|
||||
def test_model_is_small(self):
|
||||
pass
|
||||
|
||||
|
||||
@require_torch
|
||||
@require_sentencepiece
|
||||
@@ -1478,6 +1480,7 @@ class SpeechT5HifiGanTester:
|
||||
def get_config(self):
|
||||
return SpeechT5HifiGanConfig(
|
||||
model_in_dim=self.num_mel_bins,
|
||||
upsample_initial_channel=32,
|
||||
)
|
||||
|
||||
def create_and_check_model(self, config, input_values):
|
||||
@@ -1562,10 +1565,6 @@ class SpeechT5HifiGanTest(ModelTesterMixin, unittest.TestCase):
|
||||
def test_retain_grad_hidden_states_attentions(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
|
||||
def test_model_is_small(self):
|
||||
pass
|
||||
|
||||
# skip because it fails on automapping of SpeechT5HifiGanConfig
|
||||
def test_save_load_fast_init_from_base(self):
|
||||
pass
|
||||
|
||||
@@ -58,9 +58,9 @@ class SwiftFormerModelTester:
|
||||
hidden_dropout_prob=0.1,
|
||||
attention_probs_dropout_prob=0.1,
|
||||
image_size=224,
|
||||
num_labels=1000,
|
||||
layer_depths=[3, 3, 6, 4],
|
||||
embed_dims=[48, 56, 112, 220],
|
||||
num_labels=3,
|
||||
layer_depths=[1, 1, 1, 1],
|
||||
embed_dims=[16, 16, 32, 32],
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
@@ -272,10 +272,6 @@ class SwiftFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC
|
||||
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
|
||||
)
|
||||
|
||||
@unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
|
||||
def test_model_is_small(self):
|
||||
pass
|
||||
|
||||
|
||||
# We will verify our results on an image of cute cats
|
||||
def prepare_img():
|
||||
|
||||
@@ -21,8 +21,8 @@ import unittest
|
||||
|
||||
from huggingface_hub import hf_hub_download
|
||||
|
||||
from transformers import TableTransformerConfig, is_timm_available, is_vision_available
|
||||
from transformers.testing_utils import require_timm, require_vision, slow, torch_device
|
||||
from transformers import ResNetConfig, TableTransformerConfig, is_torch_available, is_vision_available
|
||||
from transformers.testing_utils import require_timm, require_torch, require_vision, slow, torch_device
|
||||
|
||||
from ...generation.test_utils import GenerationTesterMixin
|
||||
from ...test_configuration_common import ConfigTester
|
||||
@@ -30,10 +30,10 @@ from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_
|
||||
from ...test_pipeline_mixin import PipelineTesterMixin
|
||||
|
||||
|
||||
if is_timm_available():
|
||||
if is_torch_available():
|
||||
import torch
|
||||
|
||||
from transformers import ResNetConfig, TableTransformerForObjectDetection, TableTransformerModel
|
||||
from transformers import TableTransformerForObjectDetection, TableTransformerModel
|
||||
|
||||
|
||||
if is_vision_available():
|
||||
@@ -49,7 +49,7 @@ class TableTransformerModelTester:
|
||||
batch_size=8,
|
||||
is_training=True,
|
||||
use_labels=True,
|
||||
hidden_size=256,
|
||||
hidden_size=32,
|
||||
num_hidden_layers=2,
|
||||
num_attention_heads=8,
|
||||
intermediate_size=4,
|
||||
@@ -61,7 +61,7 @@ class TableTransformerModelTester:
|
||||
min_size=200,
|
||||
max_size=200,
|
||||
n_targets=8,
|
||||
num_labels=91,
|
||||
num_labels=3,
|
||||
):
|
||||
self.parent = parent
|
||||
self.batch_size = batch_size
|
||||
@@ -107,6 +107,16 @@ class TableTransformerModelTester:
|
||||
return config, pixel_values, pixel_mask, labels
|
||||
|
||||
def get_config(self):
|
||||
resnet_config = ResNetConfig(
|
||||
num_channels=3,
|
||||
embeddings_size=10,
|
||||
hidden_sizes=[10, 20, 30, 40],
|
||||
depths=[1, 1, 2, 1],
|
||||
hidden_act="relu",
|
||||
num_labels=3,
|
||||
out_features=["stage2", "stage3", "stage4"],
|
||||
out_indices=[2, 3, 4],
|
||||
)
|
||||
return TableTransformerConfig(
|
||||
d_model=self.hidden_size,
|
||||
encoder_layers=self.num_hidden_layers,
|
||||
@@ -119,6 +129,8 @@ class TableTransformerModelTester:
|
||||
attention_dropout=self.attention_probs_dropout_prob,
|
||||
num_queries=self.num_queries,
|
||||
num_labels=self.num_labels,
|
||||
use_timm_backbone=False,
|
||||
backbone_config=resnet_config,
|
||||
)
|
||||
|
||||
def prepare_config_and_inputs_for_common(self):
|
||||
@@ -175,19 +187,19 @@ class TableTransformerModelTester:
|
||||
self.parent.assertEqual(result.pred_boxes.shape, (self.batch_size, self.num_queries, 4))
|
||||
|
||||
|
||||
@require_timm
|
||||
@require_torch
|
||||
class TableTransformerModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
|
||||
all_model_classes = (
|
||||
(
|
||||
TableTransformerModel,
|
||||
TableTransformerForObjectDetection,
|
||||
)
|
||||
if is_timm_available()
|
||||
if is_torch_available()
|
||||
else ()
|
||||
)
|
||||
pipeline_model_mapping = (
|
||||
{"feature-extraction": TableTransformerModel, "object-detection": TableTransformerForObjectDetection}
|
||||
if is_timm_available()
|
||||
if is_torch_available()
|
||||
else {}
|
||||
)
|
||||
is_encoder_decoder = True
|
||||
@@ -453,6 +465,7 @@ class TableTransformerModelTest(ModelTesterMixin, GenerationTesterMixin, Pipelin
|
||||
|
||||
# let's set num_channels to 1
|
||||
config.num_channels = 1
|
||||
config.backbone_config.num_channels = 1
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config)
|
||||
@@ -486,10 +499,6 @@ class TableTransformerModelTest(ModelTesterMixin, GenerationTesterMixin, Pipelin
|
||||
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
|
||||
)
|
||||
|
||||
@unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
|
||||
def test_model_is_small(self):
|
||||
pass
|
||||
|
||||
|
||||
TOLERANCE = 1e-4
|
||||
|
||||
|
||||
@@ -42,7 +42,7 @@ class TimmBackboneModelTester:
|
||||
out_indices=None,
|
||||
out_features=None,
|
||||
stage_names=None,
|
||||
backbone="resnet50",
|
||||
backbone="resnet18",
|
||||
batch_size=3,
|
||||
image_size=32,
|
||||
num_channels=3,
|
||||
@@ -196,7 +196,7 @@ class TimmBackboneModelTest(ModelTesterMixin, BackboneTesterMixin, PipelineTeste
|
||||
def test_can_use_safetensors(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
|
||||
@unittest.skip("Need to use a timm backbone and there is no tiny model available.")
|
||||
def test_model_is_small(self):
|
||||
pass
|
||||
|
||||
|
||||
@@ -67,8 +67,8 @@ class TvltModelTester:
|
||||
num_image_channels=3,
|
||||
num_audio_channels=1,
|
||||
num_frames=2,
|
||||
hidden_size=128,
|
||||
num_hidden_layers=12,
|
||||
hidden_size=32,
|
||||
num_hidden_layers=3,
|
||||
num_attention_heads=4,
|
||||
intermediate_size=128,
|
||||
hidden_act="gelu",
|
||||
@@ -79,7 +79,7 @@ class TvltModelTester:
|
||||
qkv_bias=True,
|
||||
use_mean_pooling=True,
|
||||
decoder_num_attention_heads=4,
|
||||
decoder_hidden_size=64,
|
||||
decoder_hidden_size=32,
|
||||
decoder_num_hidden_layers=2,
|
||||
decoder_intermediate_size=128,
|
||||
image_mask_ratio=0.75,
|
||||
@@ -542,10 +542,6 @@ class TvltModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
|
||||
|
||||
check_hidden_states_output(inputs_dict, config, model_class)
|
||||
|
||||
@unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
|
||||
def test_model_is_small(self):
|
||||
pass
|
||||
|
||||
|
||||
# We will verify our results on a video of eating spaghetti
|
||||
# Frame indices used: [164 168 172 176 181 185 189 193 198 202 206 210 215 219 223 227]
|
||||
|
||||
@@ -51,7 +51,7 @@ class UperNetModelTester:
|
||||
num_channels=3,
|
||||
num_stages=4,
|
||||
hidden_sizes=[10, 20, 30, 40],
|
||||
depths=[2, 2, 3, 2],
|
||||
depths=[1, 1, 1, 1],
|
||||
is_training=True,
|
||||
use_labels=True,
|
||||
intermediate_size=37,
|
||||
@@ -106,12 +106,12 @@ class UperNetModelTester:
|
||||
def get_config(self):
|
||||
return UperNetConfig(
|
||||
backbone_config=self.get_backbone_config(),
|
||||
hidden_size=512,
|
||||
hidden_size=64,
|
||||
pool_scales=[1, 2, 3, 6],
|
||||
use_auxiliary_head=True,
|
||||
auxiliary_loss_weight=0.4,
|
||||
auxiliary_in_channels=40,
|
||||
auxiliary_channels=256,
|
||||
auxiliary_channels=32,
|
||||
auxiliary_num_convs=1,
|
||||
auxiliary_concat_input=False,
|
||||
loss_ignore_index=255,
|
||||
@@ -207,10 +207,6 @@ class UperNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
|
||||
def test_multi_gpu_data_parallel_forward(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
|
||||
def test_model_is_small(self):
|
||||
pass
|
||||
|
||||
def test_hidden_states_output(self):
|
||||
def check_hidden_states_output(inputs_dict, config, model_class):
|
||||
model = model_class(config)
|
||||
|
||||
@@ -130,6 +130,10 @@ class VideoMAEModelTester:
|
||||
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
|
||||
is_decoder=False,
|
||||
initializer_range=self.initializer_range,
|
||||
decoder_hidden_size=self.hidden_size,
|
||||
decoder_intermediate_size=self.intermediate_size,
|
||||
decoder_num_attention_heads=self.num_attention_heads,
|
||||
decoder_num_hidden_layers=self.num_hidden_layers,
|
||||
)
|
||||
|
||||
def create_and_check_model(self, config, pixel_values, labels):
|
||||
@@ -344,10 +348,6 @@ class VideoMAEModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
|
||||
|
||||
check_hidden_states_output(inputs_dict, config, model_class)
|
||||
|
||||
@unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
|
||||
def test_model_is_small(self):
|
||||
pass
|
||||
|
||||
|
||||
# We will verify our results on a video of eating spaghetti
|
||||
# Frame indices used: [164 168 172 176 181 185 189 193 198 202 206 210 215 219 223 227]
|
||||
|
||||
@@ -118,6 +118,10 @@ class ViTMAEModelTester:
|
||||
is_decoder=False,
|
||||
initializer_range=self.initializer_range,
|
||||
mask_ratio=self.mask_ratio,
|
||||
decoder_hidden_size=self.hidden_size,
|
||||
decoder_intermediate_size=self.intermediate_size,
|
||||
decoder_num_attention_heads=self.num_attention_heads,
|
||||
decoder_num_hidden_layers=self.num_hidden_layers,
|
||||
)
|
||||
|
||||
def create_and_check_model(self, config, pixel_values, labels):
|
||||
@@ -279,10 +283,6 @@ class ViTMAEModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
|
||||
def test_model_outputs_equivalence(self):
|
||||
pass
|
||||
|
||||
@unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
|
||||
def test_model_is_small(self):
|
||||
pass
|
||||
|
||||
@slow
|
||||
def test_model_from_pretrained(self):
|
||||
for model_name in VIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
|
||||
|
||||
@@ -55,8 +55,8 @@ class VivitModelTester:
|
||||
num_frames=8, # decreased, because default 32 takes too much RAM at inference
|
||||
tubelet_size=[2, 4, 4],
|
||||
num_channels=3,
|
||||
hidden_size=768,
|
||||
num_hidden_layers=5,
|
||||
hidden_size=32,
|
||||
num_hidden_layers=2,
|
||||
num_attention_heads=4,
|
||||
intermediate_size=37,
|
||||
hidden_act="gelu_fast",
|
||||
@@ -310,10 +310,6 @@ class VivitModelTest(ModelTesterMixin, unittest.TestCase):
|
||||
|
||||
check_hidden_states_output(inputs_dict, config, model_class)
|
||||
|
||||
@unittest.skip("Will be fixed soon by reducing the size of the model used for common tests.")
|
||||
def test_model_is_small(self):
|
||||
pass
|
||||
|
||||
|
||||
# We will verify our results on a video of eating spaghetti
|
||||
# Frame indices used: [164 168 172 176 181 185 189 193 198 202 206 210 215 219 223 227]
|
||||
|
||||
@@ -2708,7 +2708,6 @@ class ModelTesterMixin:
|
||||
def test_model_is_small(self):
|
||||
# Just a consistency check to make sure we are not running tests on 80M parameter models.
|
||||
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
|
||||
# print(config)
|
||||
|
||||
for model_class in self.all_model_classes:
|
||||
model = model_class(config)
|
||||
|
||||
Reference in New Issue
Block a user