Remove @slow for test_eager_matches_sdpa_inference (#34558)

* update * update * update * update * update * update * update * update * update * update * update --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
2024-11-05 16:10:42 +01:00
parent 082e57e0d4
commit f2d5dfbab2
21 changed files with 271 additions and 626 deletions
--- a/tests/models/mllama/test_modeling_mllama.py
+++ b/tests/models/mllama/test_modeling_mllama.py
@@ -30,12 +30,10 @@ from transformers import (
 from transformers.models.mllama.configuration_mllama import MllamaTextConfig
 from transformers.testing_utils import (
    cleanup,
-    is_flaky,
    require_bitsandbytes,
    require_read_token,
    require_torch,
    require_torch_gpu,
-    require_torch_sdpa,
    slow,
    torch_device,
 )
@@ -359,13 +357,6 @@ class MllamaForConditionalGenerationModelTest(ModelTesterMixin, GenerationTester

            self.assertListEqual([layer_attention.shape for layer_attention in iter_attentions], expected_shapes)

-    @require_torch_sdpa
-    @slow
-    @is_flaky()
-    def test_eager_matches_sdpa_inference_1_bfloat16(self):
-        # A workaround to override parametrized test with flaky decorator
-        super().test_eager_matches_sdpa_inference_1_bfloat16()
-
    @unittest.skip("For some unknown reasons the tests fails in CrossAttention layer when doing torch.sdpa(). ")
    def test_sdpa_can_compile_dynamic(self):
        pass