[CI] green llama tests (#37244)

* green llama tests * use cleanup instead * better test comment; cleanup upgrade * better test comment; cleanup upgrade
2025-04-03 14:15:53 +01:00
parent 782d7d945d
commit 9a1c1fe7ed
15 changed files with 62 additions and 36 deletions
--- a/tests/models/diffllama/test_modeling_diffllama.py
+++ b/tests/models/diffllama/test_modeling_diffllama.py
@@ -25,6 +25,7 @@ from parameterized import parameterized
 from transformers import AutoTokenizer, DiffLlamaConfig, StaticCache, is_torch_available, set_seed
 from transformers.testing_utils import (
    backend_empty_cache,
+    cleanup,
    require_bitsandbytes,
    require_flash_attn,
    require_read_token,
@@ -685,6 +686,10 @@ class DiffLlamaIntegrationTest(unittest.TestCase):
            # 8 is for A100 / A10 and 7 for T4
            cls.cuda_compute_capability_major_version = torch.cuda.get_device_capability()[0]

+    def tearDown(self):
+        # See LlamaIntegrationTest.tearDown(). Can be removed once LlamaIntegrationTest.tearDown() is removed.
+        cleanup(torch_device, gc_collect=False)
+
    @slow
    @require_torch_accelerator
    @require_read_token
@@ -884,7 +889,7 @@ class Mask4DTestHard(unittest.TestCase):
        max_cache_len = 16  # note that max_cache_len is greater than the attention_mask.shape[-1]
        past_key_values = StaticCache(
            config=self.model.config,
-            batch_size=1,
+            max_batch_size=1,
            max_cache_len=max_cache_len,
            device=torch_device,
            dtype=self.model.dtype,
@@ -932,7 +937,7 @@ class Mask4DTestHard(unittest.TestCase):
        max_cache_len = 16  # note that max_cache_len is greater than the attention_mask.shape[-1]
        past_key_values = StaticCache(
            config=self.model.config,
-            batch_size=1,
+            max_batch_size=1,
            max_cache_len=max_cache_len,
            device=torch_device,
            dtype=self.model.dtype,