[CI] green llama tests (#37244)

* green llama tests

* use cleanup instead

* better test comment; cleanup upgrade

* better test comment; cleanup upgrade
This commit is contained in:
Joao Gante
2025-04-03 14:15:53 +01:00
committed by GitHub
parent 782d7d945d
commit 9a1c1fe7ed
15 changed files with 62 additions and 36 deletions

View File

@@ -25,6 +25,7 @@ from parameterized import parameterized
from transformers import AutoTokenizer, DiffLlamaConfig, StaticCache, is_torch_available, set_seed
from transformers.testing_utils import (
backend_empty_cache,
cleanup,
require_bitsandbytes,
require_flash_attn,
require_read_token,
@@ -685,6 +686,10 @@ class DiffLlamaIntegrationTest(unittest.TestCase):
# 8 is for A100 / A10 and 7 for T4
cls.cuda_compute_capability_major_version = torch.cuda.get_device_capability()[0]
def tearDown(self):
# See LlamaIntegrationTest.tearDown(). Can be removed once LlamaIntegrationTest.tearDown() is removed.
cleanup(torch_device, gc_collect=False)
@slow
@require_torch_accelerator
@require_read_token
@@ -884,7 +889,7 @@ class Mask4DTestHard(unittest.TestCase):
max_cache_len = 16 # note that max_cache_len is greater than the attention_mask.shape[-1]
past_key_values = StaticCache(
config=self.model.config,
batch_size=1,
max_batch_size=1,
max_cache_len=max_cache_len,
device=torch_device,
dtype=self.model.dtype,
@@ -932,7 +937,7 @@ class Mask4DTestHard(unittest.TestCase):
max_cache_len = 16 # note that max_cache_len is greater than the attention_mask.shape[-1]
past_key_values = StaticCache(
config=self.model.config,
batch_size=1,
max_batch_size=1,
max_cache_len=max_cache_len,
device=torch_device,
dtype=self.model.dtype,