[CI] green llama tests (#37244)

* green llama tests * use cleanup instead * better test comment; cleanup upgrade * better test comment; cleanup upgrade
2025-04-03 14:15:53 +01:00
parent 782d7d945d
commit 9a1c1fe7ed
15 changed files with 62 additions and 36 deletions
--- a/benchmark/llama.py
+++ b/benchmark/llama.py
@@ -118,7 +118,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
        with torch.no_grad():
            past_key_values = StaticCache(
                model.config,
-                batch_size=batch_size,
+                max_batch_size=batch_size,
                device=device,
                dtype=torch.float16,
                max_cache_len=seq_length + num_tokens_to_generate,
@@ -144,7 +144,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,

            past_key_values = StaticCache(
                model.config,
-                batch_size=batch_size,
+                max_batch_size=batch_size,
                device=device,
                dtype=torch.float16,
                max_cache_len=seq_length + num_tokens_to_generate,
@@ -187,7 +187,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
            # TODO use  decode_one_token(model, input_id.clone(), cache_position) for verification
            past_key_values = StaticCache(
                model.config,
-                batch_size=batch_size,
+                max_batch_size=batch_size,
                device=device,
                dtype=torch.float16,
                max_cache_len=seq_length + num_tokens_to_generate + 10,
@@ -254,7 +254,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,

            past_key_values = StaticCache(
                model.config,
-                batch_size=batch_size,
+                max_batch_size=batch_size,
                device=device,
                dtype=torch.float16,
                max_cache_len=seq_length + 128,
@@ -271,7 +271,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,

            past_key_values = StaticCache(
                model.config,
-                batch_size=batch_size,
+                max_batch_size=batch_size,
                device=device,
                dtype=torch.float16,
                max_cache_len=seq_length + 128,
@@ -287,7 +287,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,

            past_key_values = StaticCache(
                model.config,
-                batch_size=batch_size,
+                max_batch_size=batch_size,
                device=device,
                dtype=torch.float16,
                max_cache_len=seq_length + 128,
@@ -303,7 +303,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,

            past_key_values = StaticCache(
                model.config,
-                batch_size=batch_size,
+                max_batch_size=batch_size,
                device=device,
                dtype=torch.float16,
                max_cache_len=seq_length + 128,