[CI] green llama tests (#37244)

* green llama tests * use cleanup instead * better test comment; cleanup upgrade * better test comment; cleanup upgrade
2025-04-03 14:15:53 +01:00
parent 782d7d945d
commit 9a1c1fe7ed
15 changed files with 62 additions and 36 deletions
--- a/docs/source/en/llm_optims.md
+++ b/docs/source/en/llm_optims.md
@@ -93,7 +93,7 @@ model.generation_config.max_new_tokens = 16

 past_key_values = StaticCache(
    config=model.config,
-    batch_size=1,
+    max_batch_size=1,
    # If you plan to reuse the cache, make sure the cache length is large enough for all cases
    max_cache_len=prompt_length+(model.generation_config.max_new_tokens*2),
    device=model.device,
@@ -159,7 +159,7 @@ from torch.nn.attention import SDPBackend, sdpa_kernel
 batch_size, seq_length = inputs["input_ids"].shape
 with torch.no_grad():
    past_key_values = StaticCache(
-        config=model.config, batch_size=2, max_cache_len=4096, device=torch_device, dtype=model.dtype
+        config=model.config, max_batch_size=2, max_cache_len=4096, device=torch_device, dtype=model.dtype
    )
    cache_position = torch.arange(seq_length, device=torch_device)
    generated_ids = torch.zeros(