Add tests for batching support (#29297)

* add tests for batching support

* Update src/transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py

Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com>

* Update src/transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py

Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com>

* Update tests/test_modeling_common.py

Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com>

* Update tests/test_modeling_common.py

Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com>

* Update tests/test_modeling_common.py

Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com>

* fixes and comments

* use cosine distance for conv models

* skip mra model testing

* Update tests/models/vilt/test_modeling_vilt.py

Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com>

* finzalize  and make style

* check model type by input names

* Update tests/models/vilt/test_modeling_vilt.py

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

* fixed batch size for all testers

* Revert "fixed batch size for all testers"

This reverts commit 525f3a0a058f069fbda00352cf202b728d40df99.

* add batch_size for all testers

* dict from model output

* do not skip layoutlm

* bring back some code from git revert

* Update tests/test_modeling_common.py

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

* Update tests/test_modeling_common.py

Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>

* clean-up

* where did minus go in tolerance

* make whisper happy

* deal with consequences of losing minus

* deal with consequences of losing minus

* maskformer needs its own test for happiness

* fix more models

* tag flaky CV models from Amy's approval

* make codestyle

---------

Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com>
Co-authored-by: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
This commit is contained in:
Raushan Turganbay
2024-03-12 22:46:19 +05:00
committed by GitHub
parent 11163fff58
commit 8e64ba2890
48 changed files with 350 additions and 67 deletions

View File

@@ -405,6 +405,7 @@ class AlignModelTester:
self.parent = parent
self.text_model_tester = AlignTextModelTester(parent, **text_kwargs)
self.vision_model_tester = AlignVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training
def prepare_config_and_inputs(self):

View File

@@ -380,6 +380,7 @@ class AltCLIPModelTester:
self.parent = parent
self.text_model_tester = AltCLIPTextModelTester(parent, **text_kwargs)
self.vision_model_tester = AltCLIPVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training
def prepare_config_and_inputs(self):

View File

@@ -107,6 +107,7 @@ class AutoformerModelTester:
cardinality=[self.cardinality],
embedding_dimension=[self.embedding_dimension],
moving_average=self.moving_average,
scaling="std", # we need std to get non-zero `loc`
)
def prepare_autoformer_inputs_dict(self, config):

View File

@@ -67,7 +67,7 @@ class BarkSemanticModelTester:
def __init__(
self,
parent,
batch_size=2,
batch_size=3, # need batch_size != num_hidden_layers
seq_length=4,
is_training=False, # for now training is not supported
use_input_mask=True,
@@ -203,7 +203,7 @@ class BarkCoarseModelTester:
def __init__(
self,
parent,
batch_size=2,
batch_size=3, # need batch_size != num_hidden_layers
seq_length=4,
is_training=False, # for now training is not supported
use_input_mask=True,
@@ -339,7 +339,7 @@ class BarkFineModelTester:
def __init__(
self,
parent,
batch_size=2,
batch_size=3, # need batch_size != num_hidden_layers
seq_length=4,
is_training=False, # for now training is not supported
use_input_mask=True,

View File

@@ -387,6 +387,7 @@ class BlipModelTester:
self.parent = parent
self.text_model_tester = BlipTextModelTester(parent, **text_kwargs)
self.vision_model_tester = BlipVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training
def prepare_config_and_inputs(self):
@@ -596,6 +597,7 @@ class BlipTextRetrievalModelTester:
self.parent = parent
self.text_model_tester = BlipTextModelTester(parent, **text_kwargs)
self.vision_model_tester = BlipVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training
def prepare_config_and_inputs(self):
@@ -643,6 +645,7 @@ class BlipTextImageModelsModelTester:
self.parent = parent
self.text_model_tester = BlipTextModelTester(parent, **text_kwargs)
self.vision_model_tester = BlipVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training
def prepare_config_and_inputs(self):
@@ -691,6 +694,7 @@ class BlipVQAModelTester:
self.parent = parent
self.text_model_tester = BlipTextModelTester(parent, **text_kwargs)
self.vision_model_tester = BlipVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training
def prepare_config_and_inputs(self):

View File

@@ -390,6 +390,7 @@ class Blip2ForConditionalGenerationDecoderOnlyModelTester:
self.vision_model_tester = Blip2VisionModelTester(parent, **vision_kwargs)
self.qformer_model_tester = Blip2QFormerModelTester(parent, **qformer_kwargs)
self.text_model_tester = Blip2TextModelDecoderOnlyTester(parent, **text_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training
self.num_query_tokens = num_query_tokens
@@ -616,6 +617,7 @@ class Blip2ModelTester:
self.vision_model_tester = Blip2VisionModelTester(parent, **vision_kwargs)
self.qformer_model_tester = Blip2QFormerModelTester(parent, **qformer_kwargs)
self.text_model_tester = Blip2TextModelTester(parent, **text_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training
self.num_query_tokens = num_query_tokens

View File

@@ -510,6 +510,7 @@ class ChineseCLIPModelTester:
self.parent = parent
self.text_model_tester = ChineseCLIPTextModelTester(parent, **text_kwargs)
self.vision_model_tester = ChineseCLIPVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training
def prepare_config_and_inputs(self):

View File

@@ -466,6 +466,7 @@ class ClapModelTester:
self.parent = parent
self.text_model_tester = ClapTextModelTester(parent, **text_kwargs)
self.audio_model_tester = ClapAudioModelTester(parent, **audio_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training
def prepare_config_and_inputs(self):

View File

@@ -437,6 +437,7 @@ class CLIPModelTester:
self.parent = parent
self.text_model_tester = CLIPTextModelTester(parent, **text_kwargs)
self.vision_model_tester = CLIPVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training
def prepare_config_and_inputs(self):

View File

@@ -388,6 +388,7 @@ class CLIPSegModelTester:
self.parent = parent
self.text_model_tester = CLIPSegTextModelTester(parent, **text_kwargs)
self.vision_model_tester = CLIPSegVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training
self.extract_layers = extract_layers

View File

@@ -344,6 +344,7 @@ class ClvpModelForConditionalGenerationTester:
self.parent = parent
self.clvp_encoder_tester = ClvpEncoderTester(parent)
self.is_training = is_training
self.batch_size = self.clvp_encoder_tester.batch_size # need bs for batching_equivalence test
def get_config(self):
decoder_config = ClvpDecoderConfig(

View File

@@ -194,6 +194,7 @@ class ConditionalDetrModelTest(ModelTesterMixin, GenerationTesterMixin, Pipeline
test_pruning = False
test_head_masking = False
test_missing_keys = False
zero_init_hidden_state = True
# special case for head models
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):

View File

@@ -57,7 +57,7 @@ class CpmAntModelTester:
prompt_length=8,
prompt_types=8,
segment_types=8,
init_std=1.0,
init_std=0.02,
return_dict=True,
):
self.parent = parent

View File

@@ -194,6 +194,7 @@ class DetrModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
test_pruning = False
test_head_masking = False
test_missing_keys = False
zero_init_hidden_state = True
# special case for head models
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):

View File

@@ -19,7 +19,7 @@ import unittest
from transformers import DPTConfig
from transformers.file_utils import is_torch_available, is_vision_available
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
from transformers.testing_utils import is_flaky, require_torch, require_vision, slow, torch_device
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor, ids_tensor
@@ -306,6 +306,10 @@ class DPTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
with self.assertRaises(ValueError):
_ = DPTForDepthEstimation(config)
@is_flaky(description="is_flaky https://github.com/huggingface/transformers/issues/29516")
def test_batching_equivalence(self):
super().test_batching_equivalence()
# We will verify our results on an image of cute cats
def prepare_img():

View File

@@ -33,11 +33,7 @@ from transformers.testing_utils import (
)
from ...test_configuration_common import ConfigTester
from ...test_modeling_common import (
ModelTesterMixin,
_config_zero_init,
floats_tensor,
)
from ...test_modeling_common import ModelTesterMixin, _config_zero_init, floats_tensor, ids_tensor
from ...test_pipeline_mixin import PipelineTesterMixin
@@ -107,6 +103,15 @@ class EncodecModelTester:
config, inputs_dict = self.prepare_config_and_inputs()
return config, inputs_dict
def prepare_config_and_inputs_for_model_class(self, model_class):
config, inputs_dict = self.prepare_config_and_inputs()
inputs_dict["audio_codes"] = ids_tensor([1, self.batch_size, 1, self.num_channels], self.codebook_size).type(
torch.int32
)
inputs_dict["audio_scales"] = [None]
return config, inputs_dict
def get_config(self):
return EncodecConfig(
audio_channels=self.num_channels,

View File

@@ -347,6 +347,13 @@ class FastSpeech2ConformerModelTest(ModelTesterMixin, unittest.TestCase):
def test_model_common_attributes(self):
pass
@unittest.skip(
"FastSpeech2Conformer predicts durations in linear domain during inference"
"Even small differences on hidden states lead to different durations, due to `torch.round`"
)
def test_batching_equivalence(self):
pass
@require_torch
@require_g2p_en
@@ -762,6 +769,13 @@ class FastSpeech2ConformerWithHifiGanTest(ModelTesterMixin, unittest.TestCase):
def test_model_common_attributes(self):
pass
@unittest.skip(
"FastSpeech2Conformer predicts durations in linear domain during inference"
"Even small differences on hidden states lead to different durations, due to `torch.round`"
)
def test_batching_equivalence(self):
pass
@require_torch
@require_g2p_en

View File

@@ -836,6 +836,7 @@ class FlavaModelTester:
self.projection_dim = projection_dim
self.initializer_range = initializer_range
self.layer_norm_eps = layer_norm_eps
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
def test_config(self):
self.config_tester.run_common_tests()

View File

@@ -507,6 +507,7 @@ class GroupViTModelTester:
self.parent = parent
self.text_model_tester = GroupViTTextModelTester(parent, **text_kwargs)
self.vision_model_tester = GroupViTVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training
def prepare_config_and_inputs(self):

View File

@@ -279,6 +279,10 @@ class InformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
def test_determinism(self):
pass
@unittest.skip("randomly selects U keys while calculating attentions")
def test_batching_equivalence(self):
pass
@unittest.skip(
reason="This architecure seem to not compute gradients properly when using GC, check: https://github.com/huggingface/transformers/pull/27124"
)

View File

@@ -397,6 +397,7 @@ class InstructBlipForConditionalGenerationDecoderOnlyModelTester:
self.vision_model_tester = InstructBlipVisionModelTester(parent, **vision_kwargs)
self.qformer_model_tester = InstructBlipQFormerModelTester(parent, **qformer_kwargs)
self.text_model_tester = InstructBlipTextModelDecoderOnlyTester(parent, **text_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training
self.num_query_tokens = num_query_tokens

View File

@@ -197,6 +197,7 @@ class Kosmos2ModelTester:
self.parent = parent
self.text_model_tester = Kosmos2TextModelTester(parent, **text_kwargs)
self.vision_model_tester = Kosmos2VisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.latent_query_num = latent_query_num
self.is_training = is_training

View File

@@ -27,6 +27,7 @@ from ...test_pipeline_mixin import PipelineTesterMixin
if is_torch_available():
import torch
import torch.nn.functional as F
from transformers import (
LayoutLMv2Config,
@@ -442,6 +443,64 @@ class LayoutLMv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
msg=f"Parameter {name} of model {model_class} seems not properly initialized",
)
def test_batching_equivalence(self):
def equivalence(tensor1, tensor2):
return 1.0 - F.cosine_similarity(tensor1.float().flatten(), tensor2.float().flatten(), dim=0, eps=0)
def recursive_check(batched_object, single_row_object, model_name, key):
if isinstance(batched_object, (list, tuple)):
for batched_object_value, single_row_object_value in zip(batched_object, single_row_object):
recursive_check(batched_object_value, single_row_object_value, model_name, key)
elif batched_object is None:
return
else:
batched_row = batched_object[:1]
self.assertFalse(
torch.isnan(batched_row).any(), f"Batched output has `nan` in {model_name} for key={key}"
)
self.assertFalse(
torch.isinf(batched_row).any(), f"Batched output has `inf` in {model_name} for key={key}"
)
self.assertFalse(
torch.isnan(single_row_object).any(), f"Single row output has `nan` in {model_name} for key={key}"
)
self.assertFalse(
torch.isinf(single_row_object).any(), f"Single row output has `inf` in {model_name} for key={key}"
)
self.assertTrue(
(equivalence(batched_row, single_row_object)) <= 1e-03,
msg=(
f"Batched and Single row outputs are not equal in {model_name} for key={key}. "
f"Difference={equivalence(batched_row, single_row_object)}."
),
)
config, batched_input = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
config.output_hidden_states = True
model_name = model_class.__name__
batched_input_prepared = self._prepare_for_class(batched_input, model_class)
model = model_class(config).to(torch_device).eval()
batch_size = self.model_tester.batch_size
single_row_input = {}
for key, value in batched_input_prepared.items():
if isinstance(value, torch.Tensor) and value.shape[0] % batch_size == 0:
single_batch_shape = value.shape[0] // batch_size
single_row_input[key] = value[:single_batch_shape]
elif hasattr(value, "tensor"):
# layoutlmv2uses ImageList intead of pixel values (needs for torchscript)
single_row_input[key] = value.tensor[:single_batch_shape]
with torch.no_grad():
model_batched_output = model(**batched_input_prepared)
model_row_output = model(**single_row_input)
for key in model_batched_output:
recursive_check(model_batched_output[key], model_row_output[key], model_name, key)
def prepare_layoutlmv2_batch_inputs():
# Here we prepare a batch of 2 sequences to test a LayoutLMv2 forward pass on:

View File

@@ -388,6 +388,10 @@ class LongformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
# longformer cannot keep gradients in attentions or hidden states
return
@unittest.skip("LongFormer calculates global attn only when attn_mask has non-zero elements")
def test_batching_equivalence(self):
return
@require_torch
@require_sentencepiece

View File

@@ -39,6 +39,7 @@ from ...test_pipeline_mixin import PipelineTesterMixin
if is_torch_available():
import torch
import torch.nn.functional as F
from transformers import MaskFormerForInstanceSegmentation, MaskFormerModel
@@ -206,6 +207,7 @@ class MaskFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
test_pruning = False
test_head_masking = False
test_missing_keys = False
zero_init_hidden_state = True
def setUp(self):
self.model_tester = MaskFormerModelTester(self)
@@ -381,6 +383,67 @@ class MaskFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
self.assertIsNotNone(outputs.auxiliary_logits)
self.assertEqual(len(outputs.auxiliary_logits), self.model_tester.num_channels - 1)
def test_batching_equivalence(self):
def equivalence(tensor1, tensor2):
return 1.0 - F.cosine_similarity(tensor1.float().flatten(), tensor2.float().flatten(), dim=0, eps=0).max()
def recursive_check(batched_object, single_row_object, model_name, key):
if isinstance(batched_object, (list, tuple)):
for batched_object_value, single_row_object_value in zip(batched_object, single_row_object):
recursive_check(batched_object_value, single_row_object_value, model_name, key)
elif batched_object is None:
return
else:
batched_row = batched_object[:1]
self.assertFalse(
torch.isnan(batched_row).any(), f"Batched output has `nan` in {model_name} for key={key}"
)
self.assertFalse(
torch.isinf(batched_row).any(), f"Batched output has `inf` in {model_name} for key={key}"
)
self.assertFalse(
torch.isnan(single_row_object).any(), f"Single row output has `nan` in {model_name} for key={key}"
)
self.assertFalse(
torch.isinf(single_row_object).any(), f"Single row output has `inf` in {model_name} for key={key}"
)
self.assertTrue(
(equivalence(batched_row, single_row_object)) <= 1e-03,
msg=(
f"Batched and Single row outputs are not equal in {model_name} for key={key}. "
f"Difference={equivalence(batched_row, single_row_object)}."
),
)
config, batched_input = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
config.output_hidden_states = True
model_name = model_class.__name__
batched_input_prepared = self._prepare_for_class(batched_input, model_class)
model = model_class(config).to(torch_device).eval()
batch_size = self.model_tester.batch_size
single_row_input = {}
for key, value in batched_input_prepared.items():
single_batch_shape = value.shape[0] // batch_size
single_row_input[key] = value[:single_batch_shape]
with torch.no_grad():
model_batched_output = model(**batched_input_prepared)
model_row_output = model(**single_row_input)
for key in model_batched_output:
# remove the first zero-init queries to decoder, otherwise cos_similarity = `nan`
# no need to check all hidden_states, already checked separately each one
if key == "transformer_decoder_hidden_states":
model_batched_output[key] = model_batched_output[key][1:]
model_row_output[key] = model_row_output[key][1:]
elif key == "hidden_states":
continue
recursive_check(model_batched_output[key], model_row_output[key], model_name, key)
TOLERANCE = 1e-4

View File

@@ -18,7 +18,7 @@
import unittest
from transformers import MobileNetV2Config
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
from transformers.testing_utils import is_flaky, require_torch, require_vision, slow, torch_device
from transformers.utils import cached_property, is_torch_available, is_vision_available
from ...test_configuration_common import ConfigTester
@@ -271,6 +271,10 @@ class MobileNetV2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC
model = MobileNetV2Model.from_pretrained(model_name)
self.assertIsNotNone(model)
@is_flaky(description="is_flaky https://github.com/huggingface/transformers/issues/29516")
def test_batching_equivalence(self):
super().test_batching_equivalence()
# We will verify our results on an image of cute cats
def prepare_img():

View File

@@ -378,6 +378,10 @@ class MraModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def test_training_gradient_checkpointing_use_reentrant_false(self):
pass
@unittest.skip("Model has `nan` in hidden_states, see https://github.com/huggingface/transformers/issues/29373.")
def test_batching_equivalence(self):
pass
@require_torch
class MraModelIntegrationTest(unittest.TestCase):

View File

@@ -103,7 +103,7 @@ class MusicgenDecoderTester:
def __init__(
self,
parent,
batch_size=2,
batch_size=3, # need batch_size != num_hidden_layers
seq_length=7,
is_training=False,
use_labels=False,
@@ -441,7 +441,7 @@ class MusicgenTester:
def __init__(
self,
parent,
batch_size=2,
batch_size=3, # need batch_size != num_hidden_layers
seq_length=7,
is_training=False,
use_labels=False,

View File

@@ -385,6 +385,7 @@ class Owlv2ModelTester:
self.is_training = is_training
self.text_config = self.text_model_tester.get_config().to_dict()
self.vision_config = self.vision_model_tester.get_config().to_dict()
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
def prepare_config_and_inputs(self):
text_config, input_ids, attention_mask = self.text_model_tester.prepare_config_and_inputs()
@@ -591,6 +592,7 @@ class Owlv2ForObjectDetectionTester:
self.is_training = is_training
self.text_config = self.text_model_tester.get_config().to_dict()
self.vision_config = self.vision_model_tester.get_config().to_dict()
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
def prepare_config_and_inputs(self):
text_config, input_ids, attention_mask = self.text_model_tester.prepare_config_and_inputs()

View File

@@ -381,6 +381,7 @@ class OwlViTModelTester:
self.is_training = is_training
self.text_config = self.text_model_tester.get_config().to_dict()
self.vision_config = self.vision_model_tester.get_config().to_dict()
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
def prepare_config_and_inputs(self):
text_config, input_ids, attention_mask = self.text_model_tester.prepare_config_and_inputs()
@@ -585,6 +586,7 @@ class OwlViTForObjectDetectionTester:
self.is_training = is_training
self.text_config = self.text_model_tester.get_config().to_dict()
self.vision_config = self.vision_model_tester.get_config().to_dict()
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
def prepare_config_and_inputs(self):
text_config, input_ids, attention_mask = self.text_model_tester.prepare_config_and_inputs()

View File

@@ -386,6 +386,7 @@ class Pix2StructModelTester:
self.parent = parent
self.text_model_tester = Pix2StructTextModelTester(parent, **text_kwargs)
self.vision_model_tester = Pix2StructVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training
def prepare_config_and_inputs(self):

View File

@@ -389,6 +389,7 @@ class SiglipModelTester:
self.parent = parent
self.text_model_tester = SiglipTextModelTester(parent, **text_kwargs)
self.vision_model_tester = SiglipVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training
# Copied from tests.models.clip.test_modeling_clip.CLIPModelTester.prepare_config_and_inputs

View File

@@ -916,6 +916,10 @@ class SpeechT5ForTextToSpeechTest(ModelTesterMixin, unittest.TestCase):
def test_determinism(self):
pass
@unittest.skip("skipped because there is always dropout in SpeechT5SpeechDecoderPrenet")
def test_batching_equivalence(self):
pass
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
@@ -1438,6 +1442,10 @@ class SpeechT5ForSpeechToSpeechTest(ModelTesterMixin, unittest.TestCase):
def test_determinism(self):
pass
@unittest.skip("skipped because there is always dropout in SpeechT5SpeechDecoderPrenet")
def test_batching_equivalence(self):
pass
def test_attention_outputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.return_dict = True

View File

@@ -209,6 +209,7 @@ class TableTransformerModelTest(ModelTesterMixin, GenerationTesterMixin, Pipelin
test_pruning = False
test_head_masking = False
test_missing_keys = False
zero_init_hidden_state = True
# special case for head models
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):

View File

@@ -104,6 +104,7 @@ class TimeSeriesTransformerModelTester:
num_static_categorical_features=1,
cardinality=[self.cardinality],
embedding_dimension=[self.embedding_dimension],
scaling="std", # we need std to get non-zero `loc`
)
def prepare_time_series_transformer_inputs_dict(self, config):

View File

@@ -66,13 +66,13 @@ class UnivNetModelTester:
def prepare_noise_sequence(self):
generator = torch.manual_seed(self.seed)
noise_shape = (self.seq_length, self.in_channels)
noise_shape = (self.batch_size, self.seq_length, self.in_channels)
# Create noise on CPU for reproducibility
noise_sequence = torch.randn(noise_shape, generator=generator, dtype=torch.float)
return noise_sequence
def prepare_config_and_inputs(self):
spectrogram = floats_tensor([self.seq_length, self.num_mel_bins], scale=1.0)
spectrogram = floats_tensor([self.batch_size, self.seq_length, self.num_mel_bins], scale=1.0)
noise_sequence = self.prepare_noise_sequence()
noise_sequence = noise_sequence.to(spectrogram.device)
config = self.get_config()
@@ -89,7 +89,7 @@ class UnivNetModelTester:
def create_and_check_model(self, config, spectrogram, noise_sequence):
model = UnivNetModel(config=config).to(torch_device).eval()
result = model(spectrogram, noise_sequence)[0]
self.parent.assertEqual(result.shape, (1, self.seq_length * 256))
self.parent.assertEqual(result.shape, (self.batch_size, self.seq_length * 256))
def prepare_config_and_inputs_for_common(self):
config, spectrogram, noise_sequence = self.prepare_config_and_inputs()
@@ -182,8 +182,8 @@ class UnivNetModelTest(ModelTesterMixin, unittest.TestCase):
model.to(torch_device)
model.eval()
batched_spectrogram = inputs["input_features"].unsqueeze(0).repeat(2, 1, 1)
batched_noise_sequence = inputs["noise_sequence"].unsqueeze(0).repeat(2, 1, 1)
batched_spectrogram = inputs["input_features"]
batched_noise_sequence = inputs["noise_sequence"]
with torch.no_grad():
batched_outputs = model(
batched_spectrogram.to(torch_device),
@@ -205,37 +205,11 @@ class UnivNetModelTest(ModelTesterMixin, unittest.TestCase):
model.eval()
with torch.no_grad():
outputs = model(inputs["input_features"].to(torch_device), inputs["noise_sequence"].to(torch_device))[
0
]
outputs = model(
inputs["input_features"][:1].to(torch_device), inputs["noise_sequence"][:1].to(torch_device)
)[0]
self.assertTrue(outputs.shape[0] == 1, msg="Unbatched input should create batched output with bsz = 1")
def test_unbatched_batched_outputs_consistency(self):
config, inputs = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
model.to(torch_device)
model.eval()
unbatched_spectrogram = inputs["input_features"].detach().clone()
unbatched_noise_sequence = inputs["noise_sequence"].detach().clone()
batched_spectrogram = inputs["input_features"].unsqueeze(0)
batched_noise_sequence = inputs["noise_sequence"].unsqueeze(0)
with torch.no_grad():
unbatched_outputs = model(
unbatched_spectrogram.to(torch_device),
unbatched_noise_sequence.to(torch_device),
)[0]
batched_outputs = model(
batched_spectrogram.to(torch_device),
batched_noise_sequence.to(torch_device),
)[0]
torch.testing.assert_close(unbatched_outputs, batched_outputs)
@require_torch_gpu
@slow

View File

@@ -345,6 +345,12 @@ class ViltModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def test_determinism(self):
pass
@unittest.skip(
"VilT samples image tokens from a multinomial distribution, resulting in not deterministic hidden states"
)
def test_batching_equivalence(self):
pass
@unittest.skip(
reason="""VilT samples image tokens from a multinomial distribution, resulting in not deterministic
hidden states"""

View File

@@ -18,7 +18,7 @@
import unittest
from transformers import ViTHybridConfig
from transformers.testing_utils import require_accelerate, require_torch, require_vision, slow, torch_device
from transformers.testing_utils import is_flaky, require_accelerate, require_torch, require_vision, slow, torch_device
from transformers.utils import cached_property, is_torch_available, is_vision_available
from ...test_configuration_common import ConfigTester
@@ -221,6 +221,10 @@ class ViTHybridModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCas
model = ViTHybridModel.from_pretrained(model_name)
self.assertIsNotNone(model)
@is_flaky(description="is_flaky https://github.com/huggingface/transformers/issues/29516")
def test_batching_equivalence(self):
super().test_batching_equivalence()
# We will verify our results on an image of cute cats
def prepare_img():

View File

@@ -270,6 +270,10 @@ class ViTMAEModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def test_model_outputs_equivalence(self):
pass
@unittest.skip(reason="ViTMAE returns a random mask + ids_restore in each forward pass")
def test_batching_equivalence(self):
pass
@slow
def test_model_from_pretrained(self):
for model_name in VIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:

View File

@@ -216,6 +216,10 @@ class VitsModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
def test_determinism(self):
pass
@unittest.skip("VITS is not deterministic")
def test_batching_equivalence(self):
pass
@is_flaky(
max_attempts=3,
description="Weight initialisation for the VITS conv layers sometimes exceeds the kaiming normal range",

View File

@@ -190,7 +190,7 @@ class WhisperModelTester:
def __init__(
self,
parent,
batch_size=2,
batch_size=3, # need batch_size != num_hidden_layers
seq_length=60,
is_training=True,
use_labels=False,
@@ -1446,6 +1446,7 @@ class WhisperModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
model = WhisperForConditionalGeneration(config).eval().to(torch_device)
input_features = input_dict["input_features"].to(torch_device)
input_features = input_features[:2]
# len = 250 with num_input_frames = 60
long_input_features = torch.cat([input_features.repeat(1, 1, 4), input_features[:, :, :10]], dim=-1)
@@ -2626,7 +2627,7 @@ class WhisperEncoderModelTester:
def __init__(
self,
parent,
batch_size=2,
batch_size=3, # need batch_size != num_hidden layers
seq_length=60,
is_training=True,
use_labels=True,
@@ -2997,7 +2998,7 @@ class WhisperStandaloneDecoderModelTester:
def __init__(
self,
parent,
batch_size=2,
batch_size=3, # need batch_size != num_hidden layers
is_training=True,
use_labels=False,
vocab_size=200,

View File

@@ -479,6 +479,7 @@ class XCLIPModelTester:
self.mit_hidden_size = mit_hidden_size
self.text_model_tester = XCLIPTextModelTester(parent, **text_kwargs)
self.vision_model_tester = XCLIPVisionModelTester(parent, **vision_kwargs)
self.batch_size = self.text_model_tester.batch_size # need bs for batching_equivalence test
self.is_training = is_training
def prepare_config_and_inputs(self):

View File

@@ -99,6 +99,7 @@ if is_accelerate_available():
if is_torch_available():
import torch
import torch.nn.functional as F
from safetensors.torch import load_file as safe_load_file
from safetensors.torch import save_file as safe_save_file
from torch import nn
@@ -693,6 +694,99 @@ class ModelTesterMixin:
expected_arg_names = [model.main_input_name]
self.assertListEqual(arg_names[:1], expected_arg_names)
def test_batching_equivalence(self):
"""
Tests that the model supports batching and that the output is the nearly the same for the same input in
different batch sizes.
(Why "nearly the same" not "exactly the same"? Batching uses different matmul shapes, which often leads to
different results: https://github.com/huggingface/transformers/issues/25420#issuecomment-1775317535)
"""
def get_tensor_equivalence_function(batched_input):
# models operating on continuous spaces have higher abs difference than LMs
# instead, we can rely on cos distance for image/speech models, similar to `diffusers`
if "input_ids" not in batched_input:
return lambda tensor1, tensor2: (
1.0 - F.cosine_similarity(tensor1.float().flatten(), tensor2.float().flatten(), dim=0, eps=1e-38)
)
return lambda tensor1, tensor2: torch.max(torch.abs(tensor1 - tensor2))
def recursive_check(batched_object, single_row_object, model_name, key):
if isinstance(batched_object, (list, tuple)):
for batched_object_value, single_row_object_value in zip(batched_object, single_row_object):
recursive_check(batched_object_value, single_row_object_value, model_name, key)
elif isinstance(batched_object, dict):
for batched_object_value, single_row_object_value in zip(
batched_object.values(), single_row_object.values()
):
recursive_check(batched_object_value, single_row_object_value, model_name, key)
# do not compare returned loss (0-dim tensor) or codebook ids (int)
elif batched_object is None or isinstance(batched_object, int):
return
elif batched_object.dim() == 0:
return
else:
# indexing the first element does not always work
# e.g. models that output similarity scores of size (N, M) would need to index [0, 0]
slice_ids = [slice(0, index) for index in single_row_object.shape]
batched_row = batched_object[slice_ids]
self.assertFalse(
torch.isnan(batched_row).any(), f"Batched output has `nan` in {model_name} for key={key}"
)
self.assertFalse(
torch.isinf(batched_row).any(), f"Batched output has `inf` in {model_name} for key={key}"
)
self.assertFalse(
torch.isnan(single_row_object).any(), f"Single row output has `nan` in {model_name} for key={key}"
)
self.assertFalse(
torch.isinf(single_row_object).any(), f"Single row output has `inf` in {model_name} for key={key}"
)
self.assertTrue(
(equivalence(batched_row, single_row_object)) <= 1e-03,
msg=(
f"Batched and Single row outputs are not equal in {model_name} for key={key}. "
f"Difference={equivalence(batched_row, single_row_object)}."
),
)
config, batched_input = self.model_tester.prepare_config_and_inputs_for_common()
equivalence = get_tensor_equivalence_function(batched_input)
for model_class in self.all_model_classes:
config.output_hidden_states = True
model_name = model_class.__name__
if hasattr(self.model_tester, "prepare_config_and_inputs_for_model_class"):
config, batched_input = self.model_tester.prepare_config_and_inputs_for_model_class(model_class)
batched_input_prepared = self._prepare_for_class(batched_input, model_class)
model = model_class(config).to(torch_device).eval()
batch_size = self.model_tester.batch_size
single_row_input = {}
for key, value in batched_input_prepared.items():
if isinstance(value, torch.Tensor) and value.shape[0] % batch_size == 0:
# e.g. musicgen has inputs of size (bs*codebooks). in most cases value.shape[0] == batch_size
single_batch_shape = value.shape[0] // batch_size
single_row_input[key] = value[:single_batch_shape]
else:
single_row_input[key] = value
with torch.no_grad():
model_batched_output = model(**batched_input_prepared)
model_row_output = model(**single_row_input)
if isinstance(model_batched_output, torch.Tensor):
model_batched_output = {"model_output": model_batched_output}
model_row_output = {"model_output": model_row_output}
for key in model_batched_output:
# DETR starts from zero-init queries to decoder, leading to cos_similarity = `nan`
if hasattr(self, "zero_init_hidden_state") and "decoder_hidden_states" in key:
model_batched_output[key] = model_batched_output[key][1:]
model_row_output[key] = model_row_output[key][1:]
recursive_check(model_batched_output[key], model_row_output[key], model_name, key)
def check_training_gradient_checkpointing(self, gradient_checkpointing_kwargs=None):
if not self.model_tester.is_training:
return