feat: run benchmarks on A100 (#34287)
This commit is contained in:
9
.github/workflows/benchmark.yml
vendored
9
.github/workflows/benchmark.yml
vendored
@@ -16,8 +16,11 @@ env:
|
|||||||
jobs:
|
jobs:
|
||||||
benchmark:
|
benchmark:
|
||||||
name: Benchmark
|
name: Benchmark
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
group: [aws-g5-4xlarge-cache, aws-p4d-24xlarge-plus]
|
||||||
runs-on:
|
runs-on:
|
||||||
group: aws-g5-4xlarge-cache
|
group: ${{ matrix.group }}
|
||||||
if: |
|
if: |
|
||||||
(github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'run-benchmark') )||
|
(github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'run-benchmark') )||
|
||||||
(github.event_name == 'push' && github.ref == 'refs/heads/main')
|
(github.event_name == 'push' && github.ref == 'refs/heads/main')
|
||||||
@@ -60,9 +63,13 @@ jobs:
|
|||||||
commit_id=$GITHUB_SHA
|
commit_id=$GITHUB_SHA
|
||||||
fi
|
fi
|
||||||
commit_msg=$(git show -s --format=%s | cut -c1-70)
|
commit_msg=$(git show -s --format=%s | cut -c1-70)
|
||||||
|
df -h
|
||||||
python3 benchmark/llama.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg"
|
python3 benchmark/llama.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg"
|
||||||
env:
|
env:
|
||||||
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
||||||
|
# Enable this to see debug logs
|
||||||
|
# HF_HUB_VERBOSITY: debug
|
||||||
|
# TRANSFORMERS_VERBOSITY: debug
|
||||||
PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }}
|
PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }}
|
||||||
PGUSER: transformers_benchmarks
|
PGUSER: transformers_benchmarks
|
||||||
PGPASSWORD: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGPASSWORD }}
|
PGPASSWORD: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGPASSWORD }}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -96,17 +96,21 @@ def run_benchmark(branch: str, commit_id: str, commit_msg: str, num_tokens_to_ge
|
|||||||
)
|
)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
benchmark_id = cur.fetchone()[0]
|
benchmark_id = cur.fetchone()[0]
|
||||||
|
logger.info(f"running benchmark #{benchmark_id} on {gpu_name}")
|
||||||
metrics_thread = Thread(target=collect_metrics, args=[benchmark_id, continue_metric_collection])
|
metrics_thread = Thread(target=collect_metrics, args=[benchmark_id, continue_metric_collection])
|
||||||
metrics_thread.start()
|
metrics_thread.start()
|
||||||
|
logger.info("started background thread to fetch device metrics")
|
||||||
|
|
||||||
os.environ["TOKENIZERS_PARALLELISM"] = "false" # silence warnings when compiling
|
os.environ["TOKENIZERS_PARALLELISM"] = "false" # silence warnings when compiling
|
||||||
|
|
||||||
device = "cuda"
|
device = "cuda"
|
||||||
ckpt = "meta-llama/Llama-2-7b-hf"
|
ckpt = "meta-llama/Llama-2-7b-hf"
|
||||||
|
|
||||||
|
logger.info("downloading weights")
|
||||||
# This is to avoid counting download in model load time measurement
|
# This is to avoid counting download in model load time measurement
|
||||||
model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16)
|
model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16)
|
||||||
gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1)
|
gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1)
|
||||||
|
logger.info("loading model")
|
||||||
start = perf_counter()
|
start = perf_counter()
|
||||||
model = AutoModelForCausalLM.from_pretrained(
|
model = AutoModelForCausalLM.from_pretrained(
|
||||||
ckpt, torch_dtype=torch.float16, generation_config=gen_config
|
ckpt, torch_dtype=torch.float16, generation_config=gen_config
|
||||||
|
|||||||
Reference in New Issue
Block a user