[CI] green llama tests (#37244)
* green llama tests * use cleanup instead * better test comment; cleanup upgrade * better test comment; cleanup upgrade
This commit is contained in:
@@ -118,7 +118,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
|
||||
with torch.no_grad():
|
||||
past_key_values = StaticCache(
|
||||
model.config,
|
||||
batch_size=batch_size,
|
||||
max_batch_size=batch_size,
|
||||
device=device,
|
||||
dtype=torch.float16,
|
||||
max_cache_len=seq_length + num_tokens_to_generate,
|
||||
@@ -144,7 +144,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
|
||||
|
||||
past_key_values = StaticCache(
|
||||
model.config,
|
||||
batch_size=batch_size,
|
||||
max_batch_size=batch_size,
|
||||
device=device,
|
||||
dtype=torch.float16,
|
||||
max_cache_len=seq_length + num_tokens_to_generate,
|
||||
@@ -187,7 +187,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
|
||||
# TODO use decode_one_token(model, input_id.clone(), cache_position) for verification
|
||||
past_key_values = StaticCache(
|
||||
model.config,
|
||||
batch_size=batch_size,
|
||||
max_batch_size=batch_size,
|
||||
device=device,
|
||||
dtype=torch.float16,
|
||||
max_cache_len=seq_length + num_tokens_to_generate + 10,
|
||||
@@ -254,7 +254,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
|
||||
|
||||
past_key_values = StaticCache(
|
||||
model.config,
|
||||
batch_size=batch_size,
|
||||
max_batch_size=batch_size,
|
||||
device=device,
|
||||
dtype=torch.float16,
|
||||
max_cache_len=seq_length + 128,
|
||||
@@ -271,7 +271,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
|
||||
|
||||
past_key_values = StaticCache(
|
||||
model.config,
|
||||
batch_size=batch_size,
|
||||
max_batch_size=batch_size,
|
||||
device=device,
|
||||
dtype=torch.float16,
|
||||
max_cache_len=seq_length + 128,
|
||||
@@ -287,7 +287,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
|
||||
|
||||
past_key_values = StaticCache(
|
||||
model.config,
|
||||
batch_size=batch_size,
|
||||
max_batch_size=batch_size,
|
||||
device=device,
|
||||
dtype=torch.float16,
|
||||
max_cache_len=seq_length + 128,
|
||||
@@ -303,7 +303,7 @@ def run_benchmark(logger: Logger, branch: str, commit_id: str, commit_msg: str,
|
||||
|
||||
past_key_values = StaticCache(
|
||||
model.config,
|
||||
batch_size=batch_size,
|
||||
max_batch_size=batch_size,
|
||||
device=device,
|
||||
dtype=torch.float16,
|
||||
max_cache_len=seq_length + 128,
|
||||
|
||||
Reference in New Issue
Block a user