From a25fc3592eec7a18aa20fe5d85bd335477896cbc Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Thu, 3 Jul 2025 15:13:06 +0200 Subject: [PATCH] Update expected values (after switching to A10) - part 4 (#39189) * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix --------- Co-authored-by: ydshieh --- tests/models/align/test_modeling_align.py | 3 + .../test_modeling_fastspeech2_conformer.py | 53 +++++++++---- .../models/focalnet/test_modeling_focalnet.py | 14 +++- tests/models/glpn/test_modeling_glpn.py | 3 + tests/models/hiera/test_modeling_hiera.py | 3 + tests/models/levit/test_modeling_levit.py | 13 ++- .../lightglue/test_modeling_lightglue.py | 9 ++- tests/models/mgp_str/test_modeling_mgp_str.py | 3 + tests/models/minimax/test_modeling_minimax.py | 12 ++- tests/models/mixtral/test_modeling_mixtral.py | 23 +++--- .../moonshine/test_modeling_moonshine.py | 32 ++++---- tests/models/mpt/test_modeling_mpt.py | 20 +++-- .../models/musicgen/test_modeling_musicgen.py | 35 ++++---- .../test_modeling_musicgen_melody.py | 14 ++-- tests/models/sam/test_modeling_sam.py | 15 +++- tests/models/sam_hq/test_modeling_sam_hq.py | 79 +++++++++++++------ .../timesformer/test_modeling_timesformer.py | 13 ++- .../test_modeling_timm_wrapper.py | 27 +++++-- .../models/videomae/test_modeling_videomae.py | 12 ++- tests/models/vitpose/test_modeling_vitpose.py | 3 + .../test_modeling_vitpose_backbone.py | 3 + tests/models/vivit/test_modeling_vivit.py | 14 ++-- .../test_modeling_wav2vec2_bert.py | 5 ++ .../test_modeling_wav2vec2_conformer.py | 4 +- tests/models/x_clip/test_modeling_x_clip.py | 24 ++++-- 25 files changed, 298 insertions(+), 138 deletions(-) diff --git a/tests/models/align/test_modeling_align.py b/tests/models/align/test_modeling_align.py index 5f147220da..15c520d1d2 100644 --- a/tests/models/align/test_modeling_align.py +++ b/tests/models/align/test_modeling_align.py @@ -462,6 +462,9 @@ class AlignModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): def test_config(self): self.config_tester.run_common_tests() + def test_batching_equivalence(self, atol=3e-4, rtol=3e-4): + super().test_batching_equivalence(atol=atol, rtol=rtol) + @unittest.skip(reason="Start to fail after using torch `cu118`.") def test_multi_gpu_data_parallel_forward(self): super().test_multi_gpu_data_parallel_forward() diff --git a/tests/models/fastspeech2_conformer/test_modeling_fastspeech2_conformer.py b/tests/models/fastspeech2_conformer/test_modeling_fastspeech2_conformer.py index 22201f42b0..ccd88576f8 100644 --- a/tests/models/fastspeech2_conformer/test_modeling_fastspeech2_conformer.py +++ b/tests/models/fastspeech2_conformer/test_modeling_fastspeech2_conformer.py @@ -24,7 +24,14 @@ from transformers import ( FastSpeech2ConformerWithHifiGanConfig, is_torch_available, ) -from transformers.testing_utils import require_g2p_en, require_torch, require_torch_accelerator, slow, torch_device +from transformers.testing_utils import ( + Expectations, + require_g2p_en, + require_torch, + require_torch_accelerator, + slow, + torch_device, +) from ...test_configuration_common import ConfigTester from ...test_modeling_common import ModelTesterMixin, _config_zero_init, ids_tensor @@ -373,24 +380,38 @@ class FastSpeech2ConformerModelIntegrationTest(unittest.TestCase): # mel-spectrogram is too large (1, 205, 80), so only check top-left 100 elements # fmt: off - expected_mel_spectrogram = torch.tensor( - [ - [-1.2426, -1.7286, -1.6754, -1.7451, -1.6402, -1.5219, -1.4480, -1.3345, -1.4031, -1.4497], - [-0.7858, -1.4966, -1.3602, -1.4876, -1.2949, -1.0723, -1.0021, -0.7553, -0.6521, -0.6929], - [-0.7298, -1.3908, -1.0369, -1.2656, -1.0342, -0.7883, -0.7420, -0.5249, -0.3734, -0.3977], - [-0.4784, -1.3508, -1.1558, -1.4678, -1.2820, -1.0252, -1.0868, -0.9006, -0.8947, -0.8448], - [-0.3963, -1.2895, -1.2813, -1.6147, -1.4658, -1.2560, -1.4134, -1.2650, -1.3255, -1.1715], - [-1.4914, -1.3097, -0.3821, -0.3898, -0.5748, -0.9040, -1.0755, -1.0575, -1.2205, -1.0572], - [0.0197, -0.0582, 0.9147, 1.1512, 1.1651, 0.6628, -0.1010, -0.3085, -0.2285, 0.2650], - [1.1780, 0.1803, 0.7251, 1.5728, 1.6678, 0.4542, -0.1572, -0.1787, 0.0744, 0.8168], - [-0.2078, -0.3211, 1.1096, 1.5085, 1.4632, 0.6299, -0.0515, 0.0589, 0.8609, 1.4429], - [0.7831, -0.2663, 1.0352, 1.4489, 0.9088, 0.0247, -0.3995, 0.0078, 1.2446, 1.6998], - ], - device=torch_device, + expectations = Expectations( + { + (None, None): [ + [-1.2426, -1.7286, -1.6754, -1.7451, -1.6402, -1.5219, -1.4480, -1.3345, -1.4031, -1.4497], + [-0.7858, -1.4966, -1.3602, -1.4876, -1.2949, -1.0723, -1.0021, -0.7553, -0.6521, -0.6929], + [-0.7298, -1.3908, -1.0369, -1.2656, -1.0342, -0.7883, -0.7420, -0.5249, -0.3734, -0.3977], + [-0.4784, -1.3508, -1.1558, -1.4678, -1.2820, -1.0252, -1.0868, -0.9006, -0.8947, -0.8448], + [-0.3963, -1.2895, -1.2813, -1.6147, -1.4658, -1.2560, -1.4134, -1.2650, -1.3255, -1.1715], + [-1.4914, -1.3097, -0.3821, -0.3898, -0.5748, -0.9040, -1.0755, -1.0575, -1.2205, -1.0572], + [0.0197, -0.0582, 0.9147, 1.1512, 1.1651, 0.6628, -0.1010, -0.3085, -0.2285, 0.2650], + [1.1780, 0.1803, 0.7251, 1.5728, 1.6678, 0.4542, -0.1572, -0.1787, 0.0744, 0.8168], + [-0.2078, -0.3211, 1.1096, 1.5085, 1.4632, 0.6299, -0.0515, 0.0589, 0.8609, 1.4429], + [0.7831, -0.2663, 1.0352, 1.4489, 0.9088, 0.0247, -0.3995, 0.0078, 1.2446, 1.6998], + ], + ("cuda", 8): [ + [-1.2425, -1.7282, -1.6750, -1.7448, -1.6400, -1.5217, -1.4478, -1.3341, -1.4026, -1.4493], + [-0.7858, -1.4967, -1.3601, -1.4875, -1.2950, -1.0725, -1.0021, -0.7553, -0.6522, -0.6929], + [-0.7303, -1.3911, -1.0370, -1.2656, -1.0345, -0.7888, -0.7423, -0.5251, -0.3737, -0.3979], + [-0.4784, -1.3506, -1.1556, -1.4677, -1.2820, -1.0253, -1.0868, -0.9006, -0.8949, -0.8448], + [-0.3968, -1.2896, -1.2811, -1.6145, -1.4660, -1.2564, -1.4135, -1.2652, -1.3258, -1.1716], + [-1.4912, -1.3092, -0.3812, -0.3886, -0.5737, -0.9034, -1.0749, -1.0571, -1.2202, -1.0567], + [0.0200, -0.0577, 0.9151, 1.1516, 1.1656, 0.6628, -0.1012, -0.3086, -0.2283, 0.2658], + [1.1778, 0.1805, 0.7255, 1.5732, 1.6680, 0.4539, -0.1572, -0.1785, 0.0751, 0.8175], + [-0.2088, -0.3212, 1.1101, 1.5085, 1.4625, 0.6293, -0.0522, 0.0587, 0.8615, 1.4432], + [0.7834, -0.2659, 1.0355, 1.4486, 0.9080, 0.0244, -0.3995, 0.0083, 1.2452, 1.6998], + ], + } ) + expected_mel_spectrogram = torch.tensor(expectations.get_expectation()).to(torch_device) # fmt: on - torch.testing.assert_close(spectrogram[0, :10, :10], expected_mel_spectrogram, rtol=1e-4, atol=1e-4) + torch.testing.assert_close(spectrogram[0, :10, :10], expected_mel_spectrogram, rtol=2e-4, atol=2e-4) self.assertEqual(spectrogram.shape, (1, 205, model.config.num_mel_bins)) def test_training_integration(self): diff --git a/tests/models/focalnet/test_modeling_focalnet.py b/tests/models/focalnet/test_modeling_focalnet.py index d272f25891..893d9ed1ee 100644 --- a/tests/models/focalnet/test_modeling_focalnet.py +++ b/tests/models/focalnet/test_modeling_focalnet.py @@ -17,7 +17,7 @@ import collections import unittest from transformers import FocalNetConfig -from transformers.testing_utils import require_torch, require_vision, slow, torch_device +from transformers.testing_utils import Expectations, require_torch, require_vision, slow, torch_device from transformers.utils import cached_property, is_torch_available, is_vision_available from ...test_backbone_common import BackboneTesterMixin @@ -425,8 +425,16 @@ class FocalNetModelIntegrationTest(unittest.TestCase): # verify the logits expected_shape = torch.Size((1, 1000)) self.assertEqual(outputs.logits.shape, expected_shape) - expected_slice = torch.tensor([0.2166, -0.4368, 0.2191]).to(torch_device) - torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=1e-4, atol=1e-4) + + expectations = Expectations( + { + (None, None): [0.2166, -0.4368, 0.2191], + ("cuda", 8): [0.2168, -0.4367, 0.2190], + } + ) + expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device) + + torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=2e-4, atol=2e-4) self.assertTrue(outputs.logits.argmax(dim=-1).item(), 281) diff --git a/tests/models/glpn/test_modeling_glpn.py b/tests/models/glpn/test_modeling_glpn.py index b3e1852373..b98743de35 100644 --- a/tests/models/glpn/test_modeling_glpn.py +++ b/tests/models/glpn/test_modeling_glpn.py @@ -164,6 +164,9 @@ class GLPNModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) + def test_batching_equivalence(self, atol=3e-4, rtol=3e-4): + super().test_batching_equivalence(atol=atol, rtol=rtol) + def test_for_depth_estimation(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_depth_estimation(*config_and_inputs) diff --git a/tests/models/hiera/test_modeling_hiera.py b/tests/models/hiera/test_modeling_hiera.py index dfbec4a4b8..1e3ed8e795 100644 --- a/tests/models/hiera/test_modeling_hiera.py +++ b/tests/models/hiera/test_modeling_hiera.py @@ -262,6 +262,9 @@ class HieraModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): self.config_tester.check_config_can_be_init_without_params() self.config_tester.check_config_arguments_init() + def test_batching_equivalence(self, atol=3e-4, rtol=3e-4): + super().test_batching_equivalence(atol=atol, rtol=rtol) + # Overriding as Hiera `get_input_embeddings` returns HieraPatchEmbeddings def test_model_get_set_embeddings(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() diff --git a/tests/models/levit/test_modeling_levit.py b/tests/models/levit/test_modeling_levit.py index f6226be1f8..80f8f822c5 100644 --- a/tests/models/levit/test_modeling_levit.py +++ b/tests/models/levit/test_modeling_levit.py @@ -19,7 +19,7 @@ from math import ceil, floor from transformers import LevitConfig from transformers.file_utils import cached_property, is_torch_available, is_vision_available -from transformers.testing_utils import require_torch, require_vision, slow, torch_device +from transformers.testing_utils import Expectations, require_torch, require_vision, slow, torch_device from ...test_configuration_common import ConfigTester from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor @@ -406,6 +406,11 @@ class LevitModelIntegrationTest(unittest.TestCase): expected_shape = torch.Size((1, 1000)) self.assertEqual(outputs.logits.shape, expected_shape) - expected_slice = torch.tensor([1.0448, -0.3745, -1.8317]).to(torch_device) - - torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=1e-4, atol=1e-4) + expectations = Expectations( + { + (None, None): [1.0448, -0.3745, -1.8317], + ("cuda", 8): [1.0453, -0.3739, -1.8314], + } + ) + expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device) + torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=2e-4, atol=2e-4) diff --git a/tests/models/lightglue/test_modeling_lightglue.py b/tests/models/lightglue/test_modeling_lightglue.py index 20d9f2ef61..7f36469bf5 100644 --- a/tests/models/lightglue/test_modeling_lightglue.py +++ b/tests/models/lightglue/test_modeling_lightglue.py @@ -17,7 +17,7 @@ import unittest from datasets import load_dataset from transformers.models.lightglue.configuration_lightglue import LightGlueConfig -from transformers.testing_utils import require_torch, require_vision, slow, torch_device +from transformers.testing_utils import get_device_properties, require_torch, require_vision, slow, torch_device from transformers.utils import cached_property, is_torch_available, is_vision_available from ...test_configuration_common import ConfigTester @@ -143,6 +143,13 @@ class LightGlueModelTest(ModelTesterMixin, unittest.TestCase): self.config_tester.check_config_can_be_init_without_params() self.config_tester.check_config_arguments_init() + def test_batching_equivalence(self, atol=1e-5, rtol=1e-5): + device_properties = get_device_properties() + if device_properties[0] == "cuda" and device_properties[1] == 8: + # TODO: (ydshieh) fix this + self.skipTest(reason="After switching to A10, this test always fails, but pass on CPU or T4.") + super().test_batching_equivalence(atol=atol, rtol=rtol) + @unittest.skip(reason="LightGlueForKeypointMatching does not use inputs_embeds") def test_inputs_embeds(self): pass diff --git a/tests/models/mgp_str/test_modeling_mgp_str.py b/tests/models/mgp_str/test_modeling_mgp_str.py index 586e9f0bc4..1ff9927f89 100644 --- a/tests/models/mgp_str/test_modeling_mgp_str.py +++ b/tests/models/mgp_str/test_modeling_mgp_str.py @@ -140,6 +140,9 @@ class MgpstrModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) + def test_batching_equivalence(self, atol=1e-4, rtol=1e-4): + super().test_batching_equivalence(atol=atol, rtol=rtol) + @unittest.skip(reason="MgpstrModel does not use inputs_embeds") def test_inputs_embeds(self): pass diff --git a/tests/models/minimax/test_modeling_minimax.py b/tests/models/minimax/test_modeling_minimax.py index b9ae9d4515..0e36c7219b 100644 --- a/tests/models/minimax/test_modeling_minimax.py +++ b/tests/models/minimax/test_modeling_minimax.py @@ -20,6 +20,7 @@ import pytest from transformers import MiniMaxConfig, is_torch_available from transformers.cache_utils import Cache from transformers.testing_utils import ( + Expectations, require_flash_attn, require_torch, require_torch_accelerator, @@ -250,15 +251,20 @@ class MiniMaxIntegrationTest(unittest.TestCase): model_id, torch_dtype=torch.bfloat16, ).to(torch_device) - expected_slice = torch.tensor( - [[1.0312, -0.5156, -0.3262], [-0.1152, 0.4336, 0.2412], [1.2188, -0.5898, -0.0381]] - ).to(torch_device) with torch.no_grad(): logits = model(dummy_input).logits logits = logits.float() + expectations = Expectations( + { + (None, None): [[1.0312, -0.5156, -0.3262], [-0.1152, 0.4336, 0.2412], [1.2188, -0.5898, -0.0381]], + ("cuda", 8): [[1.0312, -0.5156, -0.3203], [-0.1201, 0.4375, 0.2402], [1.2188, -0.5898, -0.0396]], + } + ) + expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device) + torch.testing.assert_close(logits[0, :3, :3], expected_slice, atol=1e-3, rtol=1e-3) torch.testing.assert_close(logits[1, :3, :3], expected_slice, atol=1e-3, rtol=1e-3) diff --git a/tests/models/mixtral/test_modeling_mixtral.py b/tests/models/mixtral/test_modeling_mixtral.py index 94ceb0e4a7..56c71fe735 100644 --- a/tests/models/mixtral/test_modeling_mixtral.py +++ b/tests/models/mixtral/test_modeling_mixtral.py @@ -191,27 +191,26 @@ class MixtralIntegrationTest(unittest.TestCase): # ("cuda", 8) for A100/A10, and ("cuda", 7) for T4. # # considering differences in hardware processing and potential deviations in generated text. - # fmt: off + EXPECTED_LOGITS_LEFT_UNPADDED = Expectations( { - ("xpu", 3): torch.Tensor([[0.2236, 0.5195, -0.3828], [0.8203, -0.2295, 0.6055], [0.2676, -0.7070, 0.2461]]).to(torch_device), - ("cuda", 7): torch.Tensor([[0.2236, 0.5195, -0.3828], [0.8203, -0.2275, 0.6054], [0.2656, -0.7070, 0.2460]]).to(torch_device), - ("cuda", 8): torch.Tensor([[0.2207, 0.5234, -0.3828], [0.8203, -0.2285, 0.6055], [0.2656, -0.7109, 0.2451]]).to(torch_device), - ("rocm", 9): torch.Tensor([[0.2236, 0.5195, -0.3828], [0.8203, -0.2285, 0.6055], [0.2637, -0.7109, 0.2451]]).to(torch_device), + ("xpu", 3): [[0.2236, 0.5195, -0.3828], [0.8203, -0.2295, 0.6055], [0.2676, -0.7070, 0.2461]], + ("cuda", 7): [[0.2236, 0.5195, -0.3828], [0.8203, -0.2275, 0.6054], [0.2656, -0.7070, 0.2460]], + ("cuda", 8): [[0.2217, 0.5195, -0.3828], [0.8203, -0.2295, 0.6055], [0.2676, -0.7109, 0.2461]], + ("rocm", 9): [[0.2236, 0.5195, -0.3828], [0.8203, -0.2285, 0.6055], [0.2637, -0.7109, 0.2451]], } ) - expected_left_unpadded = EXPECTED_LOGITS_LEFT_UNPADDED.get_expectation() + expected_left_unpadded = torch.tensor(EXPECTED_LOGITS_LEFT_UNPADDED.get_expectation(), device=torch_device) EXPECTED_LOGITS_RIGHT_UNPADDED = Expectations( { - ("xpu", 3): torch.Tensor([[0.2178, 0.1270, -0.1641], [-0.3496, 0.2988, -1.0312], [0.0693, 0.7930, 0.8008]]).to(torch_device), - ("cuda", 7): torch.Tensor([[0.2167, 0.1269, -0.1640], [-0.3496, 0.2988, -1.0312], [0.0688, 0.7929, 0.8007]]).to(torch_device), - ("cuda", 8): torch.Tensor([[0.2178, 0.1270, -0.1621], [-0.3496, 0.3008, -1.0312], [0.0693, 0.7930, 0.7969]]).to(torch_device), - ("rocm", 9): torch.Tensor([[0.2197, 0.1250, -0.1611], [-0.3516, 0.3008, -1.0312], [0.0684, 0.7930, 0.8008]]).to(torch_device), + ("xpu", 3): [[0.2178, 0.1270, -0.1641], [-0.3496, 0.2988, -1.0312], [0.0693, 0.7930, 0.8008]], + ("cuda", 7): [[0.2167, 0.1269, -0.1640], [-0.3496, 0.2988, -1.0312], [0.0688, 0.7929, 0.8007]], + ("cuda", 8): [[0.2178, 0.1260, -0.1621], [-0.3496, 0.2988, -1.0312], [0.0693, 0.7930, 0.8008]], + ("rocm", 9): [[0.2197, 0.1250, -0.1611], [-0.3516, 0.3008, -1.0312], [0.0684, 0.7930, 0.8008]], } ) - expected_right_unpadded = EXPECTED_LOGITS_RIGHT_UNPADDED.get_expectation() - # fmt: on + expected_right_unpadded = torch.tensor(EXPECTED_LOGITS_RIGHT_UNPADDED.get_expectation(), device=torch_device) with torch.no_grad(): logits = model(dummy_input, attention_mask=attention_mask).logits diff --git a/tests/models/moonshine/test_modeling_moonshine.py b/tests/models/moonshine/test_modeling_moonshine.py index 99573cff09..a551244a6e 100644 --- a/tests/models/moonshine/test_modeling_moonshine.py +++ b/tests/models/moonshine/test_modeling_moonshine.py @@ -17,7 +17,7 @@ import copy import unittest from transformers import MoonshineConfig, is_torch_available -from transformers.testing_utils import cleanup, require_torch, slow, torch_device +from transformers.testing_utils import Expectations, cleanup, require_torch, slow, torch_device from ...test_configuration_common import ConfigTester from ...test_modeling_common import ( @@ -457,13 +457,15 @@ class MoonshineModelIntegrationTests(unittest.TestCase): outputs = model.generate(**inputs, max_new_tokens=1, return_dict_in_generate=True, output_logits=True) # fmt: off - EXPECTED_LOGITS = torch.tensor([ - -9.1106, 4.5542, 6.3892, -6.8139, -7.2456, -7.9074, -7.2839, -7.6043, -8.0384, -7.8351, - -7.3867, -7.2450, -7.7420, -7.3912, -7.3866, -7.6979, -7.6420, -7.0504, -7.3979, -7.2483, - -8.0796, -7.3300, -7.3672, -6.8765, -7.6876, -7.2682, -6.9866, -6.7457, -7.6855, -7.3050, - ]) + expectations = Expectations( + { + (None, None): [-9.1106, 4.5542, 6.3892, -6.8139, -7.2456, -7.9074, -7.2839, -7.6043, -8.0384, -7.8351, -7.3867, -7.2450, -7.7420, -7.3912, -7.3866, -7.6979, -7.6420, -7.0504, -7.3979, -7.2483, -8.0796, -7.3300, -7.3672, -6.8765, -7.6876, -7.2682, -6.9866, -6.7457, -7.6855, -7.3050], + ("cuda", 8): [-9.1107, 4.5538, 6.3902, -6.8141, -7.2459, -7.9076, -7.2842, -7.6045, -8.0387, -7.8354, -7.3869, -7.2453, -7.7423, -7.3914, -7.3869, -7.6982, -7.6422, -7.0507, -7.3982, -7.2486, -8.0798, -7.3302, -7.3675, -6.8769, -7.6878, -7.2684, -6.9868, -6.7459, -7.6858, -7.3052], + } + ) + EXPECTED_LOGITS = torch.tensor(expectations.get_expectation()).to(torch_device) # fmt: on - torch.testing.assert_close(outputs.logits[0][0, :30].cpu(), EXPECTED_LOGITS, rtol=1e-4, atol=1e-4) + torch.testing.assert_close(outputs.logits[0][0, :30], EXPECTED_LOGITS, rtol=2e-4, atol=2e-4) @slow def test_base_logits_single(self): @@ -476,7 +478,7 @@ class MoonshineModelIntegrationTests(unittest.TestCase): # fmt: off EXPECTED_LOGITS = torch.tensor([ - -6.7336, 1.9482, 5.2448, -8.0277, -7.9167, -7.8956, -7.9649, -7.9348, -8.1312, -8.0616, + -6.7336, 1.9482, 5.2448, -8.0277, -7.9167, -7.8956, -7.9649, -7.9348, -8.1312, -8.0616, -8.1070, -7.7696, -7.8809, -7.9450, -8.1013, -7.8177, -7.8598, -7.8257, -7.8729, -7.9657, -7.9310, -8.1024, -7.8699, -7.8231, -8.0752, -7.9764, -7.8127, -8.0536, -7.9492, -7.9290, ]) @@ -493,9 +495,9 @@ class MoonshineModelIntegrationTests(unittest.TestCase): outputs = model.generate(**inputs, max_new_tokens=1, return_dict_in_generate=True, output_logits=True) # fmt: off EXPECTED_LOGITS = torch.tensor([ - [-8.0109, 5.0241, 4.5979, -6.8125, -7.1675, -7.8783, -7.2152, -7.5188, -7.9077, -7.7394], - [-4.4399, -1.4422, 6.6710, -6.8929, -7.3751, -7.0969, -6.5257, -7.0257, -7.2585, -7.0008], - [-10.0086, 3.2859, 0.7345, -6.5557, -6.8514, -6.5308, -6.4172, -6.9484, -6.6214, -6.6229], + [-8.0109, 5.0241, 4.5979, -6.8125, -7.1675, -7.8783, -7.2152, -7.5188, -7.9077, -7.7394], + [-4.4399, -1.4422, 6.6710, -6.8929, -7.3751, -7.0969, -6.5257, -7.0257, -7.2585, -7.0008], + [-10.0086, 3.2859, 0.7345, -6.5557, -6.8514, -6.5308, -6.4172, -6.9484, -6.6214, -6.6229], [-10.8078, 4.0030, -0.0633, -5.0505, -5.3906, -5.4590, -5.2420, -5.4746, -5.2665, -5.3158] ]) # fmt: on @@ -512,10 +514,10 @@ class MoonshineModelIntegrationTests(unittest.TestCase): # fmt: off EXPECTED_LOGITS = torch.tensor([ - [-7.7272, 1.4630, 5.2294, -7.7313, -7.6252, -7.6011, -7.6788, -7.6441, -7.8452, -7.7549], - [-6.2173, -0.5891, 7.9493, -7.0694, -6.9997, -6.9982, -7.0953, -7.0831, -7.1686, -7.0137], - [-7.3184, 3.1192, 3.8937, -5.7206, -5.8428, -5.7609, -5.9996, -5.8212, -5.8615, -5.8719], - [-9.5475, 1.0146, 4.1179, -5.9971, -6.0614, -6.0329, -6.2103, -6.0318, -6.0789, -6.0873] + [-7.7272, 1.4630, 5.2294, -7.7313, -7.6252, -7.6011, -7.6788, -7.6441, -7.8452, -7.7549], + [-6.2173, -0.5891, 7.9493, -7.0694, -6.9997, -6.9982, -7.0953, -7.0831, -7.1686, -7.0137], + [-7.3184, 3.1192, 3.8937, -5.7206, -5.8428, -5.7609, -5.9996, -5.8212, -5.8615, -5.8719], + [-9.5475, 1.0146, 4.1179, -5.9971, -6.0614, -6.0329, -6.2103, -6.0318, -6.0789, -6.0873] ]) # fmt: on diff --git a/tests/models/mpt/test_modeling_mpt.py b/tests/models/mpt/test_modeling_mpt.py index 449ddfbc2a..15d8fddb9f 100644 --- a/tests/models/mpt/test_modeling_mpt.py +++ b/tests/models/mpt/test_modeling_mpt.py @@ -446,7 +446,8 @@ class MptIntegrationTests(unittest.TestCase): input_text = "Hello" expected_outputs = Expectations({ - ("cuda", None): "Hello, I'm a new user of the forum. I have a question about the \"Solaris", + (None, None): "Hello, I'm a new user of the forum. I have a question about the \"Solaris", + ("cuda", 8): "Hello, I'm a new user of the forum. I have a question. I have a problem with", ("rocm", (9, 5)): "Hello, I'm a newbie to the forum. I have a question about the \"B\" in", }) # fmt: off expected_output = expected_outputs.get_expectation() @@ -468,10 +469,10 @@ class MptIntegrationTests(unittest.TestCase): input_text = "Hello" expected_outputs = Expectations({ + (None, None): "Hello and welcome to the first episode of the new podcast, The Frugal Feminist.\n", ("rocm", (9, 5)): "Hello and welcome to the first day of the new release at The Stamp Man!\nToday we are", ("xpu", 3): "Hello and welcome to the first ever episode of the new and improved, and hopefully improved, podcast.\n", - ("cuda", 7): "Hello and welcome to the first episode of the new podcast, The Frugal Feminist.\n", - ("cuda", 8): "Hello and welcome to the first day of the new release countdown for the month of May!\nToday", + ("cuda", 8): "Hello and welcome to the first ever episode of the new and improved, and hopefully improved, podcast.\n", }) # fmt: off expected_output = expected_outputs.get_expectation() @@ -499,13 +500,17 @@ class MptIntegrationTests(unittest.TestCase): expected_outputs = Expectations( { + (None, None): [ + "Hello my name is Tiffany and I am a mother of two beautiful children. I have been a nanny for the", + "Today I am going at the gym and then I am going to go to the grocery store. I am going to buy some food and some", + ], ("xpu", 3): [ "Hello my name is Tiffany. I am a mother of two beautiful children. I have been a nanny for over", "Today I am going at the gym and then I am going to go to the mall with my mom. I am going to go to the", ], - ("cuda", 7): [ - "Hello my name is Tiffany and I am a mother of two beautiful children. I have been a nanny for the", - "Today I am going at the gym and then I am going to go to the grocery store. I am going to buy some food and some", + ("cuda", 8): [ + "Hello my name is Tiffany and I am a mother of two beautiful children. I have been a nanny for over", + "Today I am going at the gym and then I am going to go to the grocery store. I am going to make a list of things", ], ("rocm", (9, 5)): [ "Hello my name is Jasmine and I am a very sweet and loving dog. I am a very playful dog and I", @@ -534,8 +539,9 @@ class MptIntegrationTests(unittest.TestCase): expected_slices = Expectations( { + (None, None): torch.Tensor([-0.2520, -0.2178, -0.1953]), ("xpu", 3): torch.Tensor([-0.2090, -0.2061, -0.1465]), - ("cuda", 7): torch.Tensor([-0.2520, -0.2178, -0.1953]), + ("cuda", 8): torch.Tensor([-0.2559, -0.2227, -0.2217]), # TODO: This is quite a bit off, check BnB ("rocm", (9, 5)): torch.Tensor([-0.3008, -0.1309, -0.1562]), } diff --git a/tests/models/musicgen/test_modeling_musicgen.py b/tests/models/musicgen/test_modeling_musicgen.py index 3de8b482d7..9356ddf92e 100644 --- a/tests/models/musicgen/test_modeling_musicgen.py +++ b/tests/models/musicgen/test_modeling_musicgen.py @@ -31,6 +31,7 @@ from transformers import ( T5Config, ) from transformers.testing_utils import ( + Expectations, get_device_properties, is_torch_available, require_flash_attn, @@ -1377,16 +1378,17 @@ class MusicgenIntegrationTests(unittest.TestCase): output_values = model.generate(**unconditional_inputs, do_sample=True, max_new_tokens=10) # fmt: off - EXPECTED_VALUES = torch.tensor( - [ - -0.0099, -0.0140, 0.0079, 0.0080, -0.0046, 0.0065, -0.0068, -0.0185, - 0.0105, 0.0059, 0.0329, 0.0249, -0.0204, -0.0341, -0.0465, 0.0053, - ] + expectations = Expectations( + { + (None, None): [-0.0099, -0.0140, 0.0079, 0.0080, -0.0046, 0.0065, -0.0068, -0.0185, 0.0105, 0.0059, 0.0329, 0.0249, -0.0204, -0.0341, -0.0465, 0.0053], + ("cuda", 8): [-0.0099, -0.0140, 0.0079, 0.0080, -0.0046, 0.0065, -0.0068, -0.0185, 0.0105, 0.0058, 0.0328, 0.0249, -0.0205, -0.0342, -0.0466, 0.0052], + } ) + EXPECTED_VALUES = torch.tensor(expectations.get_expectation()).to(torch_device) # fmt: on self.assertTrue(output_values.shape == (2, 1, 4480)) - torch.testing.assert_close(output_values[0, 0, :16].cpu(), EXPECTED_VALUES, rtol=1e-4, atol=1e-4) + torch.testing.assert_close(output_values[0, 0, :16], EXPECTED_VALUES, rtol=2e-4, atol=2e-4) @slow def test_generate_text_prompt_greedy(self): @@ -1459,16 +1461,17 @@ class MusicgenIntegrationTests(unittest.TestCase): ) # fmt: off - EXPECTED_VALUES = torch.tensor( - [ - -0.0111, -0.0154, 0.0047, 0.0058, -0.0068, 0.0012, -0.0109, -0.0229, - 0.0010, -0.0038, 0.0167, 0.0042, -0.0421, -0.0610, -0.0764, -0.0326, - ] + expectations = Expectations( + { + (None, None): [-0.0111, -0.0154, 0.0047, 0.0058, -0.0068, 0.0012, -0.0109, -0.0229, 0.0010, -0.0038, 0.0167, 0.0042, -0.0421, -0.0610, -0.0764, -0.0326], + ("cuda", 8): [-0.0110, -0.0153, 0.0048, 0.0058, -0.0068, 0.0012, -0.0109, -0.0229, 0.0010, -0.0037, 0.0168, 0.0042, -0.0420, -0.0609, -0.0763, -0.0326], + } ) + EXPECTED_VALUES = torch.tensor(expectations.get_expectation()).to(torch_device) # fmt: on self.assertTrue(output_values.shape == (2, 1, 4480)) - torch.testing.assert_close(output_values[0, 0, :16].cpu(), EXPECTED_VALUES, rtol=1e-4, atol=1e-4) + torch.testing.assert_close(output_values[0, 0, :16], EXPECTED_VALUES, rtol=2e-4, atol=2e-4) @slow def test_generate_text_audio_prompt(self): @@ -1521,13 +1524,13 @@ class MusicgenStereoIntegrationTests(unittest.TestCase): # fmt: off EXPECTED_VALUES_LEFT = torch.tensor( [ - 0.0017, 0.0004, 0.0004, 0.0005, 0.0002, 0.0002, -0.0002, -0.0013, + 0.0017, 0.0004, 0.0004, 0.0005, 0.0002, 0.0002, -0.0002, -0.0013, -0.0010, -0.0015, -0.0018, -0.0032, -0.0060, -0.0082, -0.0096, -0.0099, ] ) EXPECTED_VALUES_RIGHT = torch.tensor( [ - 0.0038, 0.0028, 0.0031, 0.0032, 0.0031, 0.0032, 0.0030, 0.0019, + 0.0038, 0.0028, 0.0031, 0.0032, 0.0031, 0.0032, 0.0030, 0.0019, 0.0021, 0.0015, 0.0009, -0.0008, -0.0040, -0.0067, -0.0087, -0.0096, ] ) @@ -1555,13 +1558,13 @@ class MusicgenStereoIntegrationTests(unittest.TestCase): # fmt: off EXPECTED_VALUES_LEFT = torch.tensor( [ - 0.2535, 0.2008, 0.1471, 0.0896, 0.0306, -0.0200, -0.0501, -0.0728, + 0.2535, 0.2008, 0.1471, 0.0896, 0.0306, -0.0200, -0.0501, -0.0728, -0.0832, -0.0856, -0.0867, -0.0884, -0.0864, -0.0866, -0.0744, -0.0430, ] ) EXPECTED_VALUES_RIGHT = torch.tensor( [ - 0.1695, 0.1213, 0.0732, 0.0239, -0.0264, -0.0705, -0.0935, -0.1103, + 0.1695, 0.1213, 0.0732, 0.0239, -0.0264, -0.0705, -0.0935, -0.1103, -0.1163, -0.1139, -0.1104, -0.1082, -0.1027, -0.1004, -0.0900, -0.0614, ] ) diff --git a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py index eef833750b..4aa812a0ae 100644 --- a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py +++ b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py @@ -30,6 +30,7 @@ from transformers import ( T5Config, ) from transformers.testing_utils import ( + Expectations, get_device_properties, is_torch_available, is_torchaudio_available, @@ -1472,16 +1473,17 @@ class MusicgenMelodyIntegrationTests(unittest.TestCase): ) # fmt: off - EXPECTED_VALUES = torch.tensor( - [ - -0.0165, -0.0222, -0.0041, -0.0058, -0.0145, -0.0023, -0.0160, -0.0310, - -0.0055, -0.0127, 0.0104, 0.0105, -0.0326, -0.0611, -0.0744, -0.0083 - ] + expectations = Expectations( + { + (None, None): [-0.0165, -0.0222, -0.0041, -0.0058, -0.0145, -0.0023, -0.0160, -0.0310, -0.0055, -0.0127, 0.0104, 0.0105, -0.0326, -0.0611, -0.0744, -0.0083], + ("cuda", 8): [-0.0165, -0.0221, -0.0040, -0.0058, -0.0145, -0.0024, -0.0160, -0.0310, -0.0055, -0.0127, 0.0104, 0.0105, -0.0326, -0.0612, -0.0744, -0.0082], + } ) + EXPECTED_VALUES = torch.tensor(expectations.get_expectation()).to(torch_device) # fmt: on self.assertTrue(output_values.shape == (2, 1, 4480)) - torch.testing.assert_close(output_values[0, 0, :16].cpu(), EXPECTED_VALUES, rtol=1e-4, atol=1e-4) + torch.testing.assert_close(output_values[0, 0, :16], EXPECTED_VALUES, rtol=2e-4, atol=2e-4) @slow def test_generate_text_audio_prompt(self): diff --git a/tests/models/sam/test_modeling_sam.py b/tests/models/sam/test_modeling_sam.py index fa1ada4f61..660d529dc9 100644 --- a/tests/models/sam/test_modeling_sam.py +++ b/tests/models/sam/test_modeling_sam.py @@ -19,7 +19,7 @@ import unittest import requests from transformers import SamConfig, SamMaskDecoderConfig, SamPromptEncoderConfig, SamVisionConfig, pipeline -from transformers.testing_utils import cleanup, require_torch, require_torch_sdpa, slow, torch_device +from transformers.testing_utils import Expectations, cleanup, require_torch, require_torch_sdpa, slow, torch_device from transformers.utils import is_torch_available, is_vision_available from ...test_configuration_common import ConfigTester @@ -771,9 +771,18 @@ class SamModelIntegrationTest(unittest.TestCase): with torch.no_grad(): outputs = model(**inputs) scores = outputs.iou_scores.squeeze().cpu() - masks = outputs.pred_masks[0, 0, 0, 0, :3].cpu() + masks = outputs.pred_masks[0, 0, 0, 0, :3] + + expectations = Expectations( + { + (None, None): [-12.7729, -12.3665, -12.6061], + ("cuda", 8): [-12.7657, -12.3683, -12.5983], + } + ) + expected_masks = torch.tensor(expectations.get_expectation()).to(torch_device) + torch.testing.assert_close(scores[-1], torch.tensor(0.9566), rtol=2e-4, atol=2e-4) - torch.testing.assert_close(masks, torch.tensor([-12.7729, -12.3665, -12.6061]), rtol=2e-4, atol=2e-4) + torch.testing.assert_close(masks, expected_masks, rtol=2e-4, atol=2e-4) def test_inference_mask_generation_batched_points_batched_images(self): model = SamModel.from_pretrained("facebook/sam-vit-base") diff --git a/tests/models/sam_hq/test_modeling_sam_hq.py b/tests/models/sam_hq/test_modeling_sam_hq.py index 830b537031..b4701fa975 100644 --- a/tests/models/sam_hq/test_modeling_sam_hq.py +++ b/tests/models/sam_hq/test_modeling_sam_hq.py @@ -27,7 +27,7 @@ from transformers import ( SamHQVisionModel, pipeline, ) -from transformers.testing_utils import cleanup, require_torch, require_torch_sdpa, slow, torch_device +from transformers.testing_utils import Expectations, cleanup, require_torch, require_torch_sdpa, slow, torch_device from transformers.utils import is_torch_available, is_vision_available from ...test_configuration_common import ConfigTester @@ -802,9 +802,15 @@ class SamHQModelIntegrationTest(unittest.TestCase): masks = outputs.pred_masks[0, 0, 0, 0, :3] self.assertTrue(torch.allclose(scores[0][0][-1], torch.tensor(0.4482), atol=2e-4)) - self.assertTrue( - torch.allclose(masks, torch.tensor([-13.1695, -14.6201, -14.8989]).to(torch_device), atol=2e-3) + + expectations = Expectations( + { + (None, None): [-13.1695, -14.6201, -14.8989], + ("cuda", 8): [-13.1668, -14.6182, -14.8970], + } ) + EXPECTED_MASKS = torch.tensor(expectations.get_expectation()).to(torch_device) + torch.testing.assert_close(masks, EXPECTED_MASKS, atol=2e-3, rtol=2e-3) def test_inference_mask_generation_one_point_one_bb(self): model = SamHQModel.from_pretrained("syscv-community/sam-hq-vit-base") @@ -849,28 +855,53 @@ class SamHQModelIntegrationTest(unittest.TestCase): with torch.no_grad(): outputs = model(**inputs) - scores = outputs.iou_scores.squeeze().cpu() - masks = outputs.pred_masks[0, 0, 0, 0, :3].cpu() - EXPECTED_SCORES = torch.tensor( - [ - [ - [0.9195, 0.8316, 0.6614], - [0.9195, 0.8316, 0.6614], - [0.9195, 0.8316, 0.6614], - [0.9195, 0.8316, 0.6614], - ], - [ - [0.7598, 0.7388, 0.3110], - [0.9195, 0.8317, 0.6614], - [0.9195, 0.8317, 0.6614], - [0.9195, 0.8317, 0.6614], - ], - ] - ) - EXPECTED_MASKS = torch.tensor([-40.2445, -37.4300, -38.1577]) + scores = outputs.iou_scores.squeeze() + masks = outputs.pred_masks[0, 0, 0, 0, :3] - self.assertTrue(torch.allclose(scores, EXPECTED_SCORES, atol=1e-3)) - self.assertTrue(torch.allclose(masks, EXPECTED_MASKS, atol=9e-3)) + expectations = Expectations( + { + (None, None): [ + [ + [0.9195, 0.8316, 0.6614], + [0.9195, 0.8316, 0.6614], + [0.9195, 0.8316, 0.6614], + [0.9195, 0.8316, 0.6614], + ], + [ + [0.7598, 0.7388, 0.3110], + [0.9195, 0.8317, 0.6614], + [0.9195, 0.8317, 0.6614], + [0.9195, 0.8317, 0.6614], + ], + ], + ("cuda", 8): [ + [ + [0.9195, 0.8316, 0.6614], + [0.9195, 0.8316, 0.6614], + [0.9195, 0.8316, 0.6614], + [0.9195, 0.8316, 0.6614], + ], + [ + [0.7597, 0.7387, 0.3110], + [0.9195, 0.8316, 0.6614], + [0.9195, 0.8316, 0.6614], + [0.9195, 0.8316, 0.6614], + ], + ], + } + ) + EXPECTED_SCORES = torch.tensor(expectations.get_expectation()).to(torch_device) + + expectations = Expectations( + { + (None, None): [-40.2445, -37.4300, -38.1577], + ("cuda", 8): [-40.2351, -37.4334, -38.1526], + } + ) + EXPECTED_MASKS = torch.tensor(expectations.get_expectation()).to(torch_device) + + torch.testing.assert_close(scores, EXPECTED_SCORES, atol=1e-3, rtol=1e-3) + torch.testing.assert_close(masks, EXPECTED_MASKS, atol=9e-3, rtol=9e-3) def test_inference_mask_generation_one_point_one_bb_zero(self): model = SamHQModel.from_pretrained("syscv-community/sam-hq-vit-base") diff --git a/tests/models/timesformer/test_modeling_timesformer.py b/tests/models/timesformer/test_modeling_timesformer.py index 247d0a5fba..5b74a8507a 100644 --- a/tests/models/timesformer/test_modeling_timesformer.py +++ b/tests/models/timesformer/test_modeling_timesformer.py @@ -21,7 +21,7 @@ from huggingface_hub import hf_hub_download from transformers import TimesformerConfig from transformers.models.auto import get_values -from transformers.testing_utils import require_torch, require_vision, slow, torch_device +from transformers.testing_utils import Expectations, require_torch, require_vision, slow, torch_device from transformers.utils import cached_property, is_torch_available, is_vision_available from ...test_configuration_common import ConfigTester @@ -350,6 +350,11 @@ class TimesformerModelIntegrationTest(unittest.TestCase): expected_shape = torch.Size((1, 400)) self.assertEqual(outputs.logits.shape, expected_shape) - expected_slice = torch.tensor([-0.3016, -0.7713, -0.4205]).to(torch_device) - - torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=1e-4, atol=1e-4) + expectations = Expectations( + { + (None, None): [-0.3016, -0.7713, -0.4205], + ("cuda", 8): [-0.3004, -0.7708, -0.4205], + } + ) + expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device) + torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=2e-4, atol=2e-4) diff --git a/tests/models/timm_wrapper/test_modeling_timm_wrapper.py b/tests/models/timm_wrapper/test_modeling_timm_wrapper.py index 3f103309a0..a37f10d381 100644 --- a/tests/models/timm_wrapper/test_modeling_timm_wrapper.py +++ b/tests/models/timm_wrapper/test_modeling_timm_wrapper.py @@ -18,6 +18,7 @@ import unittest from transformers import pipeline from transformers.testing_utils import ( + Expectations, require_bitsandbytes, require_timm, require_torch, @@ -304,10 +305,16 @@ class TimmWrapperModelIntegrationTest(unittest.TestCase): expected_label = 281 # tabby cat self.assertEqual(torch.argmax(outputs.logits).item(), expected_label) - expected_slice = torch.tensor([-11.2618, -9.6192, -10.3205]).to(torch_device) + expectations = Expectations( + { + (None, None): [-11.2618, -9.6192, -10.3205], + ("cuda", 8): [-11.2634, -9.6208, -10.3199], + } + ) + expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device) + resulted_slice = outputs.logits[0, :3] - is_close = torch.allclose(resulted_slice, expected_slice, atol=1e-3) - self.assertTrue(is_close, f"Expected {expected_slice}, but got {resulted_slice}") + torch.testing.assert_close(resulted_slice, expected_slice, atol=1e-3, rtol=1e-3) @slow def test_inference_with_pipeline(self): @@ -349,10 +356,16 @@ class TimmWrapperModelIntegrationTest(unittest.TestCase): expected_label = 281 # tabby cat self.assertEqual(torch.argmax(outputs.logits).item(), expected_label) - expected_slice = torch.tensor([-2.4043, 1.4492, -0.5127]).to(outputs.logits.dtype) - resulted_slice = outputs.logits[0, :3].cpu() - is_close = torch.allclose(resulted_slice, expected_slice, atol=0.1) - self.assertTrue(is_close, f"Expected {expected_slice}, but got {resulted_slice}") + expectations = Expectations( + { + (None, None): [-2.4043, 1.4492, -0.5127], + ("cuda", 8): [-2.2676, 1.5303, -0.4409], + } + ) + expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device) + + resulted_slice = outputs.logits[0, :3].to(dtype=torch.float32) + torch.testing.assert_close(resulted_slice, expected_slice, atol=0.1, rtol=0.1) @slow def test_transformers_model_for_classification_is_equivalent_to_timm(self): diff --git a/tests/models/videomae/test_modeling_videomae.py b/tests/models/videomae/test_modeling_videomae.py index 0b85c31f8a..2c592290a6 100644 --- a/tests/models/videomae/test_modeling_videomae.py +++ b/tests/models/videomae/test_modeling_videomae.py @@ -24,6 +24,7 @@ from pytest import mark from transformers import VideoMAEConfig from transformers.models.auto import get_values from transformers.testing_utils import ( + Expectations, is_flaky, require_flash_attn, require_torch, @@ -442,9 +443,14 @@ class VideoMAEModelIntegrationTest(unittest.TestCase): expected_shape = torch.Size((1, 400)) self.assertEqual(outputs.logits.shape, expected_shape) - expected_slice = torch.tensor([0.3669, -0.0688, -0.2421]).to(torch_device) - - torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=1e-4, atol=1e-4) + expectations = Expectations( + { + (None, None): [0.3669, -0.0688, -0.2421], + ("cuda", 8): [0.3668, -0.0690, -0.2421], + } + ) + expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device) + torch.testing.assert_close(outputs.logits[0, :3], expected_slice, rtol=2e-4, atol=2e-4) @slow def test_inference_for_pretraining(self): diff --git a/tests/models/vitpose/test_modeling_vitpose.py b/tests/models/vitpose/test_modeling_vitpose.py index 6f4ac62132..e9bce2d4c6 100644 --- a/tests/models/vitpose/test_modeling_vitpose.py +++ b/tests/models/vitpose/test_modeling_vitpose.py @@ -169,6 +169,9 @@ class VitPoseModelTest(ModelTesterMixin, unittest.TestCase): self.config_tester.check_config_can_be_init_without_params() self.config_tester.check_config_arguments_init() + def test_batching_equivalence(self, atol=3e-4, rtol=3e-4): + super().test_batching_equivalence(atol=atol, rtol=rtol) + @unittest.skip(reason="VitPose does not support input and output embeddings") def test_model_common_attributes(self): pass diff --git a/tests/models/vitpose_backbone/test_modeling_vitpose_backbone.py b/tests/models/vitpose_backbone/test_modeling_vitpose_backbone.py index 64ff79a68e..a95d6ca1fa 100644 --- a/tests/models/vitpose_backbone/test_modeling_vitpose_backbone.py +++ b/tests/models/vitpose_backbone/test_modeling_vitpose_backbone.py @@ -137,6 +137,9 @@ class VitPoseBackboneModelTest(ModelTesterMixin, unittest.TestCase): def test_config(self): self.config_tester.run_common_tests() + def test_batching_equivalence(self, atol=3e-4, rtol=3e-4): + super().test_batching_equivalence(atol=atol, rtol=rtol) + # TODO: @Pavel @unittest.skip(reason="currently failing") def test_initialization(self): diff --git a/tests/models/vivit/test_modeling_vivit.py b/tests/models/vivit/test_modeling_vivit.py index d4d3efe374..f2866febb7 100644 --- a/tests/models/vivit/test_modeling_vivit.py +++ b/tests/models/vivit/test_modeling_vivit.py @@ -22,7 +22,7 @@ from huggingface_hub import hf_hub_download from transformers import VivitConfig from transformers.models.auto import get_values -from transformers.testing_utils import require_torch, require_vision, slow, torch_device +from transformers.testing_utils import Expectations, require_torch, require_vision, slow, torch_device from transformers.utils import cached_property, is_torch_available, is_vision_available from ...test_configuration_common import ConfigTester @@ -355,10 +355,14 @@ class VivitModelIntegrationTest(unittest.TestCase): expected_shape = torch.Size((1, 400)) self.assertEqual(outputs.logits.shape, expected_shape) - # taken from original model - expected_slice = torch.tensor([-0.9498, 2.7971, -1.4049, 0.1024, -1.8353]).to(torch_device) - - torch.testing.assert_close(outputs.logits[0, :5], expected_slice, rtol=1e-4, atol=1e-4) + expectations = Expectations( + { + (None, None): [-0.9498, 2.7971, -1.4049, 0.1024, -1.8353], + ("cuda", 8): [-0.9502, 2.7967, -1.4046, 0.1027, -1.8345], + } + ) + expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device) + torch.testing.assert_close(outputs.logits[0, :5], expected_slice, rtol=2e-4, atol=2e-4) @slow def test_inference_interpolate_pos_encoding(self): diff --git a/tests/models/wav2vec2_bert/test_modeling_wav2vec2_bert.py b/tests/models/wav2vec2_bert/test_modeling_wav2vec2_bert.py index 1408e09744..a8e2c4843e 100644 --- a/tests/models/wav2vec2_bert/test_modeling_wav2vec2_bert.py +++ b/tests/models/wav2vec2_bert/test_modeling_wav2vec2_bert.py @@ -20,6 +20,7 @@ from datasets import load_dataset from transformers import Wav2Vec2BertConfig, is_torch_available from transformers.testing_utils import ( + is_flaky, require_torch, require_torch_accelerator, require_torch_fp16, @@ -434,6 +435,10 @@ class Wav2Vec2BertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Test config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) + @is_flaky(description="Get lager difference with A10 and even with the new `5e-4` still flaky") + def test_batching_equivalence(self, atol=5e-4, rtol=5e-4): + super().test_batching_equivalence(atol=atol, rtol=rtol) + def test_model_with_relative(self): config_and_inputs = self.model_tester.prepare_config_and_inputs(position_embeddings_type="relative") self.model_tester.create_and_check_model(*config_and_inputs) diff --git a/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py b/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py index f980582024..430653721d 100644 --- a/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py +++ b/tests/models/wav2vec2_conformer/test_modeling_wav2vec2_conformer.py @@ -428,8 +428,8 @@ class Wav2Vec2ConformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest @is_flaky( description="The `codevector_idx` computed with `argmax()` in `Wav2Vec2ConformerGumbelVectorQuantizer.forward` is not stable." ) - def test_batching_equivalence(self): - super().test_batching_equivalence() + def test_batching_equivalence(self, atol=1e-4, rtol=1e-4): + super().test_batching_equivalence(atol=atol, rtol=rtol) def test_model_with_relative(self): config_and_inputs = self.model_tester.prepare_config_and_inputs(position_embeddings_type="relative") diff --git a/tests/models/x_clip/test_modeling_x_clip.py b/tests/models/x_clip/test_modeling_x_clip.py index 1a0c7dda6e..8c5134fc6d 100644 --- a/tests/models/x_clip/test_modeling_x_clip.py +++ b/tests/models/x_clip/test_modeling_x_clip.py @@ -22,7 +22,14 @@ import numpy as np from huggingface_hub import hf_hub_download from transformers import XCLIPConfig, XCLIPTextConfig, XCLIPVisionConfig -from transformers.testing_utils import require_torch, require_torch_multi_gpu, require_vision, slow, torch_device +from transformers.testing_utils import ( + Expectations, + require_torch, + require_torch_multi_gpu, + require_vision, + slow, + torch_device, +) from transformers.utils import is_torch_available, is_vision_available from ...test_configuration_common import ConfigTester @@ -751,10 +758,13 @@ class XCLIPModelIntegrationTest(unittest.TestCase): self.assertEqual(outputs.vision_model_output.last_hidden_state.shape, expected_shape) - expected_slice = torch.tensor( - [[0.0126, 0.2109, 0.0609], [0.0448, 0.5862, -0.1688], [-0.0881, 0.8525, -0.3044]] - ).to(torch_device) - - torch.testing.assert_close( - outputs.vision_model_output.last_hidden_state[0, :3, :3], expected_slice, rtol=1e-4, atol=1e-4 + expectations = Expectations( + { + (None, None): [[0.0126, 0.2109, 0.0609], [0.0448, 0.5862, -0.1688], [-0.0881, 0.8525, -0.3044]], + ("cuda", 8): [[0.0141, 0.2114, 0.0599], [0.0446, 0.5866, -0.1674], [-0.0876, 0.8592, -0.3025]], + } + ) + expected_slice = torch.tensor(expectations.get_expectation()).to(torch_device) + torch.testing.assert_close( + outputs.vision_model_output.last_hidden_state[0, :3, :3], expected_slice, rtol=2e-4, atol=2e-4 )