Compare commits
27 Commits
ko-deepsee
...
v4.55.4
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d79b2d981f | ||
|
|
90792b730a | ||
|
|
a03df6acd4 | ||
|
|
170b2708cb | ||
|
|
7dbc054e2a | ||
|
|
c097a43898 | ||
|
|
663cbb0d04 | ||
|
|
c7bd5350f0 | ||
|
|
e75d67ec39 | ||
|
|
d7f67d2006 | ||
|
|
acf295aec3 | ||
|
|
aaa3169aa2 | ||
|
|
ea2eee0bc8 | ||
|
|
956be23fff | ||
|
|
79a9ffc520 | ||
|
|
99404c7098 | ||
|
|
0d6908038c | ||
|
|
b8e97fbfd2 | ||
|
|
586b6e693b | ||
|
|
95ae07d11f | ||
|
|
0d9032ae71 | ||
|
|
1d42803aac | ||
|
|
382717e543 | ||
|
|
cc98f42d22 | ||
|
|
d2f7266367 | ||
|
|
daab2db33f | ||
|
|
06f8004e5c |
BIN
._.DS_Store
BIN
._.DS_Store
Binary file not shown.
BIN
._.circleci
BIN
._.circleci
Binary file not shown.
BIN
._.gitattributes
BIN
._.gitattributes
Binary file not shown.
BIN
._.gitignore
BIN
._.gitignore
Binary file not shown.
BIN
._AGENTS.md
BIN
._AGENTS.md
Binary file not shown.
BIN
._CITATION.cff
BIN
._CITATION.cff
Binary file not shown.
Binary file not shown.
BIN
._ISSUES.md
BIN
._ISSUES.md
Binary file not shown.
Binary file not shown.
BIN
._benchmark
BIN
._benchmark
Binary file not shown.
BIN
._examples
BIN
._examples
Binary file not shown.
BIN
._notebooks
BIN
._notebooks
Binary file not shown.
BIN
._templates
BIN
._templates
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -109,9 +109,7 @@ class CircleCIJob:
|
||||
self.docker_image[0]["image"] = f"{self.docker_image[0]['image']}:dev"
|
||||
print(f"Using {self.docker_image} docker image")
|
||||
if self.install_steps is None:
|
||||
self.install_steps = ["uv pip install ."]
|
||||
# Use a custom patched pytest to force exit the process at the end, to avoid `Too long with no output (exceeded 10m0s): context deadline exceeded`
|
||||
self.install_steps.append("uv pip install git+https://github.com/ydshieh/pytest.git@8.4.1-ydshieh")
|
||||
self.install_steps = ["uv venv && uv pip install ."]
|
||||
if self.pytest_options is None:
|
||||
self.pytest_options = {}
|
||||
if isinstance(self.tests_to_run, str):
|
||||
@@ -215,7 +213,7 @@ generate_job = CircleCIJob(
|
||||
docker_image=[{"image": "huggingface/transformers-torch-light"}],
|
||||
# networkx==3.3 (after #36957) cause some issues
|
||||
# TODO: remove this once it works directly
|
||||
install_steps=["uv pip install ."],
|
||||
install_steps=["uv venv && uv pip install ."],
|
||||
marker="generate",
|
||||
parallelism=6,
|
||||
)
|
||||
@@ -252,7 +250,7 @@ examples_torch_job = CircleCIJob(
|
||||
additional_env={"OMP_NUM_THREADS": 8},
|
||||
docker_image=[{"image":"huggingface/transformers-examples-torch"}],
|
||||
# TODO @ArthurZucker remove this once docker is easier to build
|
||||
install_steps=["uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"],
|
||||
install_steps=["uv venv && uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"],
|
||||
pytest_num_workers=4,
|
||||
)
|
||||
|
||||
@@ -261,7 +259,7 @@ hub_job = CircleCIJob(
|
||||
additional_env={"HUGGINGFACE_CO_STAGING": True},
|
||||
docker_image=[{"image":"huggingface/transformers-torch-light"}],
|
||||
install_steps=[
|
||||
'uv pip install .',
|
||||
'uv venv && uv pip install .',
|
||||
'git config --global user.email "ci@dummy.com"',
|
||||
'git config --global user.name "ci"',
|
||||
],
|
||||
@@ -275,6 +273,7 @@ onnx_job = CircleCIJob(
|
||||
"onnx",
|
||||
docker_image=[{"image":"huggingface/transformers-torch-tf-light"}],
|
||||
install_steps=[
|
||||
"uv venv",
|
||||
"uv pip install .[testing,sentencepiece,onnxruntime,vision,rjieba]",
|
||||
],
|
||||
pytest_options={"k onnx": None},
|
||||
@@ -304,7 +303,7 @@ non_model_job = CircleCIJob(
|
||||
docker_image=[{"image": "huggingface/transformers-torch-light"}],
|
||||
# networkx==3.3 (after #36957) cause some issues
|
||||
# TODO: remove this once it works directly
|
||||
install_steps=["uv pip install .[serving]"],
|
||||
install_steps=["uv venv && uv pip install .[serving]"],
|
||||
marker="not generate",
|
||||
parallelism=6,
|
||||
)
|
||||
@@ -322,7 +321,7 @@ doc_test_job = CircleCIJob(
|
||||
additional_env={"TRANSFORMERS_VERBOSITY": "error", "DATASETS_VERBOSITY": "error", "SKIP_CUDA_DOCTEST": "1"},
|
||||
install_steps=[
|
||||
# Add an empty file to keep the test step running correctly even no file is selected to be tested.
|
||||
"uv pip install .",
|
||||
"uv venv && pip install .",
|
||||
"touch dummy.py",
|
||||
command,
|
||||
"cat pr_documentation_tests_temp.txt",
|
||||
|
||||
11
.gitattributes
vendored
11
.gitattributes
vendored
@@ -1,13 +1,4 @@
|
||||
*.py eol=lf
|
||||
*.rst eol=lf
|
||||
*.md eol=lf
|
||||
*.mdx eol=lf
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.png filter=lfs diff=lfs merge=lfs -text
|
||||
*.jpg filter=lfs diff=lfs merge=lfs -text
|
||||
*.jpeg filter=lfs diff=lfs merge=lfs -text
|
||||
*.gif filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.mdx eol=lf
|
||||
BIN
.github/._ISSUE_TEMPLATE
vendored
BIN
.github/._ISSUE_TEMPLATE
vendored
Binary file not shown.
BIN
.github/._PULL_REQUEST_TEMPLATE.md
vendored
BIN
.github/._PULL_REQUEST_TEMPLATE.md
vendored
Binary file not shown.
BIN
.github/._conda
vendored
BIN
.github/._conda
vendored
Binary file not shown.
BIN
.github/._scripts
vendored
BIN
.github/._scripts
vendored
Binary file not shown.
BIN
.github/._workflows
vendored
BIN
.github/._workflows
vendored
Binary file not shown.
BIN
.github/ISSUE_TEMPLATE/._bug-report.yml
vendored
BIN
.github/ISSUE_TEMPLATE/._bug-report.yml
vendored
Binary file not shown.
BIN
.github/ISSUE_TEMPLATE/._config.yml
vendored
BIN
.github/ISSUE_TEMPLATE/._config.yml
vendored
Binary file not shown.
BIN
.github/ISSUE_TEMPLATE/._feature-request.yml
vendored
BIN
.github/ISSUE_TEMPLATE/._feature-request.yml
vendored
Binary file not shown.
BIN
.github/ISSUE_TEMPLATE/._i18n.md
vendored
BIN
.github/ISSUE_TEMPLATE/._i18n.md
vendored
Binary file not shown.
BIN
.github/ISSUE_TEMPLATE/._migration.yml
vendored
BIN
.github/ISSUE_TEMPLATE/._migration.yml
vendored
Binary file not shown.
BIN
.github/ISSUE_TEMPLATE/._new-model-addition.yml
vendored
BIN
.github/ISSUE_TEMPLATE/._new-model-addition.yml
vendored
Binary file not shown.
BIN
.github/conda/._build.sh
vendored
BIN
.github/conda/._build.sh
vendored
Binary file not shown.
BIN
.github/conda/._meta.yaml
vendored
BIN
.github/conda/._meta.yaml
vendored
Binary file not shown.
BIN
.github/scripts/._assign_reviewers.py
vendored
BIN
.github/scripts/._assign_reviewers.py
vendored
Binary file not shown.
BIN
.github/scripts/._codeowners_for_review_action
vendored
BIN
.github/scripts/._codeowners_for_review_action
vendored
Binary file not shown.
BIN
.github/workflows/._TROUBLESHOOT.md
vendored
BIN
.github/workflows/._TROUBLESHOOT.md
vendored
Binary file not shown.
BIN
.github/workflows/._add-model-like.yml
vendored
BIN
.github/workflows/._add-model-like.yml
vendored
Binary file not shown.
BIN
.github/workflows/._assign-reviewers.yml
vendored
BIN
.github/workflows/._assign-reviewers.yml
vendored
Binary file not shown.
BIN
.github/workflows/._build-ci-docker-images.yml
vendored
BIN
.github/workflows/._build-ci-docker-images.yml
vendored
Binary file not shown.
BIN
.github/workflows/._build-docker-images.yml
vendored
BIN
.github/workflows/._build-docker-images.yml
vendored
Binary file not shown.
Binary file not shown.
BIN
.github/workflows/._build-past-ci-docker-images.yml
vendored
BIN
.github/workflows/._build-past-ci-docker-images.yml
vendored
Binary file not shown.
BIN
.github/workflows/._check_tiny_models.yml
vendored
BIN
.github/workflows/._check_tiny_models.yml
vendored
Binary file not shown.
BIN
.github/workflows/._get-pr-info.yml
vendored
BIN
.github/workflows/._get-pr-info.yml
vendored
Binary file not shown.
BIN
.github/workflows/._get-pr-number.yml
vendored
BIN
.github/workflows/._get-pr-number.yml
vendored
Binary file not shown.
BIN
.github/workflows/._model_jobs_intel_gaudi.yml
vendored
BIN
.github/workflows/._model_jobs_intel_gaudi.yml
vendored
Binary file not shown.
Binary file not shown.
BIN
.github/workflows/._pr-style-bot.yml
vendored
BIN
.github/workflows/._pr-style-bot.yml
vendored
Binary file not shown.
BIN
.github/workflows/._push-important-models.yml
vendored
BIN
.github/workflows/._push-important-models.yml
vendored
Binary file not shown.
BIN
.github/workflows/._release-conda.yml
vendored
BIN
.github/workflows/._release-conda.yml
vendored
Binary file not shown.
BIN
.github/workflows/._self-nightly-past-ci-caller.yml
vendored
BIN
.github/workflows/._self-nightly-past-ci-caller.yml
vendored
Binary file not shown.
BIN
.github/workflows/._self-past-caller.yml
vendored
BIN
.github/workflows/._self-past-caller.yml
vendored
Binary file not shown.
BIN
.github/workflows/._self-push-amd-mi210-caller.yml
vendored
BIN
.github/workflows/._self-push-amd-mi210-caller.yml
vendored
Binary file not shown.
BIN
.github/workflows/._self-push-amd-mi250-caller.yml
vendored
BIN
.github/workflows/._self-push-amd-mi250-caller.yml
vendored
Binary file not shown.
BIN
.github/workflows/._self-push-amd.yml
vendored
BIN
.github/workflows/._self-push-amd.yml
vendored
Binary file not shown.
BIN
.github/workflows/._self-push-caller.yml
vendored
BIN
.github/workflows/._self-push-caller.yml
vendored
Binary file not shown.
BIN
.github/workflows/._self-scheduled-amd-caller.yml
vendored
BIN
.github/workflows/._self-scheduled-amd-caller.yml
vendored
Binary file not shown.
Binary file not shown.
BIN
.github/workflows/._self-scheduled-intel-gaudi.yml
vendored
BIN
.github/workflows/._self-scheduled-intel-gaudi.yml
vendored
Binary file not shown.
Binary file not shown.
BIN
.github/workflows/._ssh-runner.yml
vendored
BIN
.github/workflows/._ssh-runner.yml
vendored
Binary file not shown.
BIN
.github/workflows/._stale.yml
vendored
BIN
.github/workflows/._stale.yml
vendored
Binary file not shown.
BIN
.github/workflows/._trufflehog.yml
vendored
BIN
.github/workflows/._trufflehog.yml
vendored
Binary file not shown.
BIN
.github/workflows/._update_metdata.yml
vendored
BIN
.github/workflows/._update_metdata.yml
vendored
Binary file not shown.
BIN
.github/workflows/._upload_pr_documentation.yml
vendored
BIN
.github/workflows/._upload_pr_documentation.yml
vendored
Binary file not shown.
2
.github/workflows/benchmark.yml
vendored
2
.github/workflows/benchmark.yml
vendored
@@ -48,7 +48,7 @@ jobs:
|
||||
|
||||
- name: Run database init script
|
||||
run: |
|
||||
psql -f benchmark/utils/init_db.sql
|
||||
psql -f benchmark/init_db.sql
|
||||
env:
|
||||
PGDATABASE: metrics
|
||||
PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }}
|
||||
|
||||
5
.github/workflows/check_failed_tests.yml
vendored
5
.github/workflows/check_failed_tests.yml
vendored
@@ -21,9 +21,6 @@ on:
|
||||
report_repo_id:
|
||||
required: true
|
||||
type: string
|
||||
commit_sha:
|
||||
required: false
|
||||
type: string
|
||||
|
||||
|
||||
env:
|
||||
@@ -90,7 +87,7 @@ jobs:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Get target commit
|
||||
working-directory: /transformers/utils
|
||||
|
||||
49
.github/workflows/collated-reports.yml
vendored
49
.github/workflows/collated-reports.yml
vendored
@@ -1,49 +0,0 @@
|
||||
name: CI collated reports
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
job:
|
||||
required: true
|
||||
type: string
|
||||
report_repo_id:
|
||||
required: true
|
||||
type: string
|
||||
machine_type:
|
||||
required: true
|
||||
type: string
|
||||
gpu_name:
|
||||
description: Name of the GPU used for the job. Its enough that the value contains the name of the GPU, e.g. "noise-h100-more-noise". Case insensitive.
|
||||
required: true
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
collated_reports:
|
||||
name: Collated reports
|
||||
runs-on: ubuntu-22.04
|
||||
if: always()
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/download-artifact@v4
|
||||
|
||||
- name: Collated reports
|
||||
shell: bash
|
||||
env:
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
CI_SHA: ${{ github.sha }}
|
||||
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
|
||||
run: |
|
||||
pip install huggingface_hub
|
||||
python3 utils/collated_reports.py \
|
||||
--path . \
|
||||
--machine-type ${{ inputs.machine_type }} \
|
||||
--commit-hash ${{ env.CI_SHA }} \
|
||||
--job ${{ inputs.job }} \
|
||||
--report-repo-id ${{ inputs.report_repo_id }} \
|
||||
--gpu-name ${{ inputs.gpu_name }}
|
||||
|
||||
- name: Upload collated reports
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: collated_reports_${{ env.CI_SHA }}.json
|
||||
path: collated_reports_${{ env.CI_SHA }}.json
|
||||
5
.github/workflows/model_jobs.yml
vendored
5
.github/workflows/model_jobs.yml
vendored
@@ -18,9 +18,6 @@ on:
|
||||
docker:
|
||||
required: true
|
||||
type: string
|
||||
commit_sha:
|
||||
required: false
|
||||
type: string
|
||||
report_name_prefix:
|
||||
required: false
|
||||
default: run_models_gpu
|
||||
@@ -73,7 +70,7 @@ jobs:
|
||||
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
|
||||
59
.github/workflows/self-nightly-caller.yml
vendored
59
.github/workflows/self-nightly-caller.yml
vendored
@@ -1,54 +1,43 @@
|
||||
name: Nvidia CI with nightly torch
|
||||
name: Self-hosted runner (nightly-ci)
|
||||
|
||||
|
||||
on:
|
||||
repository_dispatch:
|
||||
# triggered when the daily scheduled Nvidia CI is completed.
|
||||
# This way, we can compare the results more easily.
|
||||
workflow_run:
|
||||
workflows: ["Nvidia CI"]
|
||||
branches: ["main"]
|
||||
types: [completed]
|
||||
schedule:
|
||||
- cron: "17 2 * * *"
|
||||
push:
|
||||
branches:
|
||||
- run_ci_with_nightly_torch*
|
||||
|
||||
# Used for `push` to easily modify the target workflow runs to compare against
|
||||
env:
|
||||
prev_workflow_run_id: ""
|
||||
other_workflow_run_id: ""
|
||||
|
||||
- run_nightly_ci*
|
||||
|
||||
jobs:
|
||||
build_nightly_torch_ci_images:
|
||||
name: Build CI Docker Images with nightly torch
|
||||
build_nightly_ci_images:
|
||||
name: Build Nightly CI Docker Images
|
||||
if: (github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_nightly_ci'))
|
||||
uses: ./.github/workflows/build-nightly-ci-docker-images.yml
|
||||
secrets: inherit
|
||||
|
||||
setup:
|
||||
name: Setup
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Setup
|
||||
run: |
|
||||
mkdir "setup_values"
|
||||
echo "${{ inputs.prev_workflow_run_id || env.prev_workflow_run_id }}" > "setup_values/prev_workflow_run_id.txt"
|
||||
echo "${{ inputs.other_workflow_run_id || env.other_workflow_run_id }}" > "setup_values/other_workflow_run_id.txt"
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: setup_values
|
||||
path: setup_values
|
||||
|
||||
model-ci:
|
||||
name: Model CI
|
||||
needs: build_nightly_torch_ci_images
|
||||
needs: [build_nightly_ci_images]
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_models_gpu
|
||||
slack_report_channel: "#transformers-ci-past-future"
|
||||
runner: ci
|
||||
docker: huggingface/transformers-all-latest-torch-nightly-gpu
|
||||
ci_event: Nightly CI
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci_with_torch_nightly
|
||||
commit_sha: ${{ github.event.workflow_run.head_sha || github.sha }}
|
||||
secrets: inherit
|
||||
|
||||
deepspeed-ci:
|
||||
name: DeepSpeed CI
|
||||
needs: [build_nightly_ci_images]
|
||||
uses: ./.github/workflows/self-scheduled.yml
|
||||
with:
|
||||
job: run_torch_cuda_extensions_gpu
|
||||
slack_report_channel: "#transformers-ci-past-future"
|
||||
runner: ci
|
||||
# test deepspeed nightly build with the latest release torch
|
||||
docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
|
||||
ci_event: Nightly CI
|
||||
working-directory-prefix: /workspace
|
||||
secrets: inherit
|
||||
|
||||
25
.github/workflows/self-push-amd-mi300-caller.yml
vendored
Normal file
25
.github/workflows/self-push-amd-mi300-caller.yml
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
name: Self-hosted runner (AMD mi300 CI caller)
|
||||
|
||||
on:
|
||||
#workflow_run:
|
||||
# workflows: ["Self-hosted runner (push-caller)"]
|
||||
# branches: ["main"]
|
||||
# types: [completed]
|
||||
push:
|
||||
branches:
|
||||
- run_amd_push_ci_caller*
|
||||
paths:
|
||||
- "src/**"
|
||||
- "tests/**"
|
||||
- ".github/**"
|
||||
- "templates/**"
|
||||
- "utils/**"
|
||||
|
||||
jobs:
|
||||
run_amd_ci:
|
||||
name: AMD mi300
|
||||
if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && (startsWith(github.ref_name, 'run_amd_push_ci_caller') || startsWith(github.ref_name, 'mi300-ci'))))
|
||||
uses: ./.github/workflows/self-push-amd.yml
|
||||
with:
|
||||
gpu_flavor: mi300
|
||||
secrets: inherit
|
||||
@@ -1,8 +1,8 @@
|
||||
name: Self-hosted runner scale set (AMD mi355 scheduled CI caller)
|
||||
name: Self-hosted runner scale set (AMD mi300 scheduled CI caller)
|
||||
|
||||
# Note: For every job in this workflow, the name of the runner scale set is finalized in the runner yaml i.e. huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml
|
||||
# For example, 1gpu : amd-mi355-ci-1gpu
|
||||
# 2gpu : amd-mi355-ci-2gpu
|
||||
# For example, 1gpu scale set: amd-mi300-ci-1gpu
|
||||
# 2gpu scale set: amd-mi300-ci-2gpu
|
||||
|
||||
on:
|
||||
workflow_run:
|
||||
@@ -20,9 +20,9 @@ jobs:
|
||||
with:
|
||||
job: run_models_gpu
|
||||
slack_report_channel: "#amd-hf-ci"
|
||||
runner_scale_set: amd-mi355-ci
|
||||
runner_scale_set: amd-mi300-ci
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi355
|
||||
ci_event: Scheduled CI (AMD) - mi300
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
@@ -32,9 +32,9 @@ jobs:
|
||||
with:
|
||||
job: run_pipelines_torch_gpu
|
||||
slack_report_channel: "#amd-hf-ci"
|
||||
runner_scale_set: amd-mi355-ci
|
||||
runner_scale_set: amd-mi300-ci
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi355
|
||||
ci_event: Scheduled CI (AMD) - mi300
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
@@ -44,9 +44,9 @@ jobs:
|
||||
with:
|
||||
job: run_examples_gpu
|
||||
slack_report_channel: "#amd-hf-ci"
|
||||
runner_scale_set: amd-mi355-ci
|
||||
runner_scale_set: amd-mi300-ci
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi355
|
||||
ci_event: Scheduled CI (AMD) - mi300
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
@@ -56,8 +56,8 @@ jobs:
|
||||
with:
|
||||
job: run_torch_cuda_extensions_gpu
|
||||
slack_report_channel: "#amd-hf-ci"
|
||||
runner_scale_set: amd-mi355-ci
|
||||
runner_scale_set: amd-mi300-ci
|
||||
docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi355
|
||||
ci_event: Scheduled CI (AMD) - mi300
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
||||
@@ -24,7 +24,6 @@ jobs:
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi325
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
env_file: /etc/podinfo/gha-gpu-isolation-settings
|
||||
secrets: inherit
|
||||
|
||||
torch-pipeline:
|
||||
@@ -37,7 +36,6 @@ jobs:
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi325
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
env_file: /etc/podinfo/gha-gpu-isolation-settings
|
||||
secrets: inherit
|
||||
|
||||
example-ci:
|
||||
@@ -50,7 +48,6 @@ jobs:
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi325
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
env_file: /etc/podinfo/gha-gpu-isolation-settings
|
||||
secrets: inherit
|
||||
|
||||
deepspeed-ci:
|
||||
@@ -63,5 +60,4 @@ jobs:
|
||||
docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi325
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
env_file: /etc/podinfo/gha-gpu-isolation-settings
|
||||
secrets: inherit
|
||||
|
||||
11
.github/workflows/self-scheduled-caller.yml
vendored
11
.github/workflows/self-scheduled-caller.yml
vendored
@@ -1,4 +1,5 @@
|
||||
name: Nvidia CI
|
||||
name: Self-hosted runner (scheduled)
|
||||
|
||||
|
||||
on:
|
||||
repository_dispatch:
|
||||
@@ -6,7 +7,7 @@ on:
|
||||
- cron: "17 2 * * *"
|
||||
push:
|
||||
branches:
|
||||
- run_nvidia_ci*
|
||||
- run_scheduled_ci*
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
prev_workflow_run_id:
|
||||
@@ -53,7 +54,6 @@ jobs:
|
||||
docker: huggingface/transformers-all-latest-gpu
|
||||
ci_event: Daily CI
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
commit_sha: ${{ github.sha }}
|
||||
secrets: inherit
|
||||
|
||||
torch-pipeline:
|
||||
@@ -65,7 +65,6 @@ jobs:
|
||||
docker: huggingface/transformers-pytorch-gpu
|
||||
ci_event: Daily CI
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
commit_sha: ${{ github.sha }}
|
||||
secrets: inherit
|
||||
|
||||
example-ci:
|
||||
@@ -77,7 +76,6 @@ jobs:
|
||||
docker: huggingface/transformers-all-latest-gpu
|
||||
ci_event: Daily CI
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
commit_sha: ${{ github.sha }}
|
||||
secrets: inherit
|
||||
|
||||
trainer-fsdp-ci:
|
||||
@@ -89,7 +87,6 @@ jobs:
|
||||
docker: huggingface/transformers-all-latest-gpu
|
||||
ci_event: Daily CI
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
commit_sha: ${{ github.sha }}
|
||||
secrets: inherit
|
||||
|
||||
deepspeed-ci:
|
||||
@@ -102,7 +99,6 @@ jobs:
|
||||
ci_event: Daily CI
|
||||
working-directory-prefix: /workspace
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
commit_sha: ${{ github.sha }}
|
||||
secrets: inherit
|
||||
|
||||
quantization-ci:
|
||||
@@ -114,5 +110,4 @@ jobs:
|
||||
docker: huggingface/transformers-quantization-latest-gpu
|
||||
ci_event: Daily CI
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
commit_sha: ${{ github.sha }}
|
||||
secrets: inherit
|
||||
|
||||
21
.github/workflows/self-scheduled.yml
vendored
21
.github/workflows/self-scheduled.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Nvidia CI (job definitions)
|
||||
name: Self-hosted runner (scheduled)
|
||||
|
||||
# Note that each job's dependencies go into a corresponding docker file.
|
||||
#
|
||||
@@ -28,9 +28,6 @@ on:
|
||||
report_repo_id:
|
||||
required: true
|
||||
type: string
|
||||
commit_sha:
|
||||
required: false
|
||||
type: string
|
||||
|
||||
|
||||
env:
|
||||
@@ -49,8 +46,8 @@ env:
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
name: Setup
|
||||
if: contains(fromJSON('["run_models_gpu", "run_trainer_and_fsdp_gpu", "run_quantization_torch_gpu"]'), inputs.job)
|
||||
name: Setup
|
||||
strategy:
|
||||
matrix:
|
||||
machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
|
||||
@@ -122,7 +119,6 @@ jobs:
|
||||
slice_id: ${{ matrix.slice_id }}
|
||||
runner_map: ${{ needs.setup.outputs.runner_map }}
|
||||
docker: ${{ inputs.docker }}
|
||||
commit_sha: ${{ inputs.commit_sha || github.sha }}
|
||||
secrets: inherit
|
||||
|
||||
run_trainer_and_fsdp_gpu:
|
||||
@@ -141,7 +137,6 @@ jobs:
|
||||
slice_id: ${{ matrix.slice_id }}
|
||||
runner_map: ${{ needs.setup.outputs.runner_map }}
|
||||
docker: ${{ inputs.docker }}
|
||||
commit_sha: ${{ inputs.commit_sha || github.sha }}
|
||||
report_name_prefix: run_trainer_and_fsdp_gpu
|
||||
secrets: inherit
|
||||
|
||||
@@ -160,7 +155,7 @@ jobs:
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
@@ -228,7 +223,7 @@ jobs:
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
@@ -297,7 +292,7 @@ jobs:
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: ${{ inputs.working-directory-prefix }}/transformers
|
||||
run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: ${{ inputs.working-directory-prefix }}/transformers
|
||||
@@ -405,7 +400,7 @@ jobs:
|
||||
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
||||
working-directory: /transformers
|
||||
@@ -469,7 +464,6 @@ jobs:
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 2
|
||||
ref: ${{ inputs.commit_sha || github.sha }}
|
||||
|
||||
- name: Install transformers
|
||||
run: pip install transformers
|
||||
@@ -524,7 +518,6 @@ jobs:
|
||||
quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
|
||||
ci_event: ${{ inputs.ci_event }}
|
||||
report_repo_id: ${{ inputs.report_repo_id }}
|
||||
commit_sha: ${{ inputs.commit_sha || github.sha }}
|
||||
|
||||
secrets: inherit
|
||||
|
||||
@@ -535,7 +528,7 @@ jobs:
|
||||
uses: ./.github/workflows/check_failed_tests.yml
|
||||
with:
|
||||
docker: ${{ inputs.docker }}
|
||||
start_sha: ${{ inputs.commit_sha || github.sha }}
|
||||
start_sha: ${{ github.sha }}
|
||||
job: ${{ inputs.job }}
|
||||
slack_report_channel: ${{ inputs.slack_report_channel }}
|
||||
ci_event: ${{ inputs.ci_event }}
|
||||
|
||||
12
.github/workflows/slack-report.yml
vendored
12
.github/workflows/slack-report.yml
vendored
@@ -24,10 +24,6 @@ on:
|
||||
report_repo_id:
|
||||
required: true
|
||||
type: string
|
||||
commit_sha:
|
||||
required: false
|
||||
type: string
|
||||
|
||||
|
||||
env:
|
||||
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
|
||||
@@ -36,7 +32,7 @@ jobs:
|
||||
send_results:
|
||||
name: Send results to webhook
|
||||
runs-on: ubuntu-22.04
|
||||
if: always() && !cancelled()
|
||||
if: always()
|
||||
steps:
|
||||
- name: Preliminary job status
|
||||
shell: bash
|
||||
@@ -45,10 +41,6 @@ jobs:
|
||||
echo "Setup status: ${{ inputs.setup_status }}"
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 2
|
||||
ref: ${{ inputs.commit_sha || github.sha }}
|
||||
|
||||
- uses: actions/download-artifact@v4
|
||||
|
||||
- name: Prepare some setup values
|
||||
@@ -75,7 +67,7 @@ jobs:
|
||||
SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
CI_EVENT: ${{ inputs.ci_event }}
|
||||
CI_SHA: ${{ inputs.commit_sha || github.sha }}
|
||||
CI_SHA: ${{ github.sha }}
|
||||
CI_TEST_JOB: ${{ inputs.job }}
|
||||
SETUP_STATUS: ${{ inputs.setup_status }}
|
||||
REPORT_REPO_ID: ${{ inputs.report_repo_id }}
|
||||
|
||||
@@ -68,7 +68,8 @@ already reported** (use the search bar on GitHub under Issues). Your issue shoul
|
||||
|
||||
Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so we can quickly resolve it:
|
||||
|
||||
* Your **OS type and version** and **Python**, and **PyTorch** versions when applicable.
|
||||
* Your **OS type and version** and **Python**, **PyTorch** and
|
||||
**TensorFlow** versions when applicable.
|
||||
* A short, self-contained, code snippet that allows us to reproduce the bug in
|
||||
less than 30s.
|
||||
* The *full* traceback if an exception is raised.
|
||||
@@ -164,7 +165,8 @@ You'll need **[Python 3.9](https://github.com/huggingface/transformers/blob/main
|
||||
mode with the `-e` flag.
|
||||
|
||||
Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
|
||||
failure with this command. If that's the case make sure to install Pytorch then do:
|
||||
failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
|
||||
(PyTorch, TensorFlow and/or Flax) then do:
|
||||
|
||||
```bash
|
||||
pip install -e ".[quality]"
|
||||
|
||||
1
Makefile
1
Makefile
@@ -52,7 +52,6 @@ repo-consistency:
|
||||
python utils/check_doctest_list.py
|
||||
python utils/update_metadata.py --check-only
|
||||
python utils/check_docstrings.py
|
||||
python utils/add_dates.py
|
||||
|
||||
# this target runs checks on all files
|
||||
|
||||
|
||||
@@ -147,7 +147,7 @@ chat = [
|
||||
{"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
|
||||
]
|
||||
|
||||
pipeline = pipeline(task="text-generation", model="meta-llama/Meta-Llama-3-8B-Instruct", dtype=torch.bfloat16, device_map="auto")
|
||||
pipeline = pipeline(task="text-generation", model="meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.bfloat16, device_map="auto")
|
||||
response = pipeline(chat, max_new_tokens=512)
|
||||
print(response[0]["generated_text"][-1]["content"])
|
||||
```
|
||||
|
||||
@@ -14,7 +14,7 @@ Models uploaded on the Hugging Face Hub come in different formats. We heavily re
|
||||
models in the [`safetensors`](https://github.com/huggingface/safetensors) format (which is the default prioritized
|
||||
by the transformers library), as developed specifically to prevent arbitrary code execution on your system.
|
||||
|
||||
To avoid loading models from unsafe formats (e.g. [pickle](https://docs.python.org/3/library/pickle.html), you should use the `use_safetensors` parameter. If doing so, in the event that no .safetensors file is present, transformers will error when loading the model.
|
||||
To avoid loading models from unsafe formats(e.g. [pickle](https://docs.python.org/3/library/pickle.html), you should use the `use_safetensors` parameter. If doing so, in the event that no .safetensors file is present, transformers will error when loading the model.
|
||||
|
||||
### Remote code
|
||||
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
1
benchmark/.gitignore
vendored
1
benchmark/.gitignore
vendored
@@ -1 +0,0 @@
|
||||
benchmark_results/
|
||||
@@ -1,345 +0,0 @@
|
||||
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from logging import Logger
|
||||
import os
|
||||
from threading import Event, Thread
|
||||
from time import perf_counter, sleep
|
||||
from typing import Optional
|
||||
import sys
|
||||
|
||||
# Add the parent directory to Python path to import benchmarks_entrypoint
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from benchmarks_entrypoint import MetricsRecorder
|
||||
|
||||
import gpustat
|
||||
import psutil
|
||||
import psycopg2
|
||||
|
||||
# Optional heavy ML dependencies - only required when actually running the benchmark
|
||||
try:
|
||||
import torch
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache
|
||||
TRANSFORMERS_AVAILABLE = True
|
||||
except ImportError:
|
||||
TRANSFORMERS_AVAILABLE = False
|
||||
torch = None
|
||||
AutoModelForCausalLM = None
|
||||
AutoTokenizer = None
|
||||
GenerationConfig = None
|
||||
StaticCache = None
|
||||
|
||||
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "1"
|
||||
|
||||
# Only set torch precision if torch is available
|
||||
if TRANSFORMERS_AVAILABLE:
|
||||
torch.set_float32_matmul_precision("high")
|
||||
|
||||
|
||||
def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder):
|
||||
p = psutil.Process(os.getpid())
|
||||
while not continue_metric_collection.is_set():
|
||||
with p.oneshot():
|
||||
cpu_util = p.cpu_percent()
|
||||
mem_megabytes = p.memory_info().rss / (1024 * 1024)
|
||||
gpu_stats = gpustat.GPUStatCollection.new_query()
|
||||
gpu_util = gpu_stats[0]["utilization.gpu"]
|
||||
gpu_mem_megabytes = gpu_stats[0]["memory.used"]
|
||||
metrics_recorder.collect_device_measurements(
|
||||
benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
|
||||
)
|
||||
sleep(0.01)
|
||||
|
||||
|
||||
def run_benchmark(
|
||||
logger: Logger, repository: str, branch: str, commit_id: str, commit_msg: str, metrics_recorder=None, num_tokens_to_generate=100
|
||||
):
|
||||
# Check if required ML dependencies are available
|
||||
if not TRANSFORMERS_AVAILABLE:
|
||||
logger.error("Transformers and torch are required to run the LLaMA benchmark. Please install them with:")
|
||||
logger.error("pip install torch transformers")
|
||||
logger.error("Skipping LLaMA benchmark due to missing dependencies.")
|
||||
return
|
||||
|
||||
continue_metric_collection = Event()
|
||||
metrics_thread = None
|
||||
model_id = "meta-llama/Llama-2-7b-hf"
|
||||
|
||||
# If no metrics_recorder is provided, create one for backward compatibility
|
||||
if metrics_recorder is None:
|
||||
try:
|
||||
metrics_recorder = MetricsRecorder(
|
||||
psycopg2.connect("dbname=metrics"), logger, repository, branch, commit_id, commit_msg, True
|
||||
)
|
||||
should_close_recorder = True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create metrics recorder: {e}")
|
||||
return
|
||||
else:
|
||||
should_close_recorder = False
|
||||
try:
|
||||
gpu_stats = gpustat.GPUStatCollection.new_query()
|
||||
gpu_name = gpu_stats[0]["name"]
|
||||
benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
|
||||
logger.info(f"running benchmark #{benchmark_id} on {gpu_name} for {model_id}")
|
||||
metrics_thread = Thread(
|
||||
target=collect_metrics,
|
||||
args=[benchmark_id, continue_metric_collection, metrics_recorder],
|
||||
)
|
||||
metrics_thread.start()
|
||||
logger.info("started background thread to fetch device metrics")
|
||||
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false" # silence warnings when compiling
|
||||
|
||||
device = "cuda"
|
||||
|
||||
logger.info("downloading weights")
|
||||
# This is to avoid counting download in model load time measurement
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.float16)
|
||||
gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1)
|
||||
logger.info("loading model")
|
||||
start = perf_counter()
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_id, dtype=torch.float16, generation_config=gen_config
|
||||
).eval()
|
||||
model.to(device)
|
||||
torch.cuda.synchronize()
|
||||
end = perf_counter()
|
||||
model_load_time = end - start
|
||||
logger.info(f"loaded model in: {model_load_time}s")
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
|
||||
prompt = "Why dogs are so cute?"
|
||||
inputs = tokenizer(prompt, return_tensors="pt").to(device)
|
||||
|
||||
# Specify the max length (including both the prompt and the response)
|
||||
# When calling `generate` with `cache_implementation="static" later, this is also used to create a `StaticCache` object
|
||||
# with sequence length = `max_length`. The longer the more you will re-use it
|
||||
seq_length = inputs["input_ids"].shape[1]
|
||||
model.generation_config.max_length = seq_length + num_tokens_to_generate
|
||||
batch_size = inputs["input_ids"].shape[0]
|
||||
|
||||
# Copied from the gpt-fast repo
|
||||
def multinomial_sample_one_no_sync(probs_sort): # Does multinomial sampling without a cuda synchronization
|
||||
q = torch.empty_like(probs_sort).exponential_(1)
|
||||
return torch.argmax(probs_sort / q, dim=-1, keepdim=True).to(dtype=torch.int)
|
||||
|
||||
def logits_to_probs(logits, temperature: float = 1.0, top_k: Optional[int] = None):
|
||||
logits = logits / max(temperature, 1e-5)
|
||||
|
||||
if top_k is not None:
|
||||
v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
|
||||
pivot = v.select(-1, -1).unsqueeze(-1)
|
||||
logits = torch.where(logits < pivot, -float("Inf"), logits)
|
||||
probs = torch.nn.functional.softmax(logits, dim=-1)
|
||||
return probs
|
||||
|
||||
def sample(logits, temperature: float = 1.0, top_k: Optional[int] = None):
|
||||
probs = logits_to_probs(logits[0, -1], temperature, top_k)
|
||||
idx_next = multinomial_sample_one_no_sync(probs)
|
||||
return idx_next, probs
|
||||
|
||||
# First eager forward pass
|
||||
logger.info("running first eager forward pass")
|
||||
start = perf_counter()
|
||||
outputs = model(**inputs)
|
||||
torch.cuda.synchronize()
|
||||
end = perf_counter()
|
||||
first_eager_fwd_pass_time = end - start
|
||||
logger.info(f"completed first eager forward pass in: {first_eager_fwd_pass_time}s")
|
||||
|
||||
# Second eager forward pass (should be faster)
|
||||
logger.info("running second eager forward pass")
|
||||
start = perf_counter()
|
||||
outputs = model(**inputs)
|
||||
torch.cuda.synchronize()
|
||||
end = perf_counter()
|
||||
second_eager_fwd_pass_time = end - start
|
||||
logger.info(f"completed second eager forward pass in: {second_eager_fwd_pass_time}s")
|
||||
|
||||
# First eager generation
|
||||
logger.info("running first eager generation")
|
||||
start = perf_counter()
|
||||
output = model.generate(**inputs)
|
||||
torch.cuda.synchronize()
|
||||
end = perf_counter()
|
||||
first_eager_generate_time = end - start
|
||||
logger.info(f"completed first eager generation in: {first_eager_generate_time}s")
|
||||
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
|
||||
|
||||
# Second eager generation (should be faster)
|
||||
logger.info("running second eager generation")
|
||||
start = perf_counter()
|
||||
output = model.generate(**inputs)
|
||||
torch.cuda.synchronize()
|
||||
end = perf_counter()
|
||||
second_eager_generate_time = end - start
|
||||
logger.info(f"completed second eager generation in: {second_eager_generate_time}s")
|
||||
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
|
||||
|
||||
logger.info("running generation timing loop")
|
||||
|
||||
input_pos = torch.arange(0, seq_length, device=device)
|
||||
inputs = inputs["input_ids"]
|
||||
|
||||
start = perf_counter()
|
||||
with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
|
||||
logits = model(inputs, position_ids=input_pos).logits
|
||||
next_token, probs = sample(logits, temperature=0.6, top_k=5)
|
||||
torch.cuda.synchronize()
|
||||
end = perf_counter()
|
||||
time_to_first_token = end - start
|
||||
|
||||
input_pos = torch.tensor([seq_length], device=device, dtype=torch.int)
|
||||
next_token = next_token.clone()
|
||||
start = perf_counter()
|
||||
with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
|
||||
logits = model(next_token, position_ids=input_pos).logits
|
||||
next_token, probs = sample(logits, temperature=0.6, top_k=5)
|
||||
torch.cuda.synchronize()
|
||||
end = perf_counter()
|
||||
time_to_second_token = end - start
|
||||
|
||||
input_pos = torch.tensor([seq_length + 1], device=device, dtype=torch.int)
|
||||
next_token = next_token.clone()
|
||||
start = perf_counter()
|
||||
with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
|
||||
logits = model(next_token, position_ids=input_pos).logits
|
||||
next_token, probs = sample(logits, temperature=0.6, top_k=5)
|
||||
torch.cuda.synchronize()
|
||||
end = perf_counter()
|
||||
time_to_third_token = end - start
|
||||
|
||||
logger.info("running longer generation timing loop")
|
||||
|
||||
total_time = 0
|
||||
for i in range(20):
|
||||
input_pos = torch.tensor([seq_length + 2 + i], device=device, dtype=torch.int)
|
||||
next_token = next_token.clone()
|
||||
start = perf_counter()
|
||||
with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
|
||||
logits = model(next_token, position_ids=input_pos).logits
|
||||
next_token, probs = sample(logits, temperature=0.6, top_k=5)
|
||||
torch.cuda.synchronize()
|
||||
end = perf_counter()
|
||||
total_time += end - start
|
||||
|
||||
mean_time_to_next_token = total_time / 20
|
||||
|
||||
logger.info("running compilation benchmarks")
|
||||
|
||||
# Now compile the model
|
||||
model = torch.compile(model, mode="max-autotune", fullgraph=True)
|
||||
|
||||
# StaticCache for generation
|
||||
with torch.device(device):
|
||||
model.setup_caches(max_batch_size=batch_size, max_seq_len=seq_length + num_tokens_to_generate)
|
||||
|
||||
input_pos = torch.arange(0, seq_length, device=device)
|
||||
inputs = tokenizer(prompt, return_tensors="pt").to(device)["input_ids"]
|
||||
|
||||
logger.info("compiling model")
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.float16, generation_config=gen_config)
|
||||
model.to(device)
|
||||
model = torch.compile(model, mode="max-autotune", fullgraph=True)
|
||||
|
||||
past_key_values = StaticCache(
|
||||
model.config,
|
||||
max_batch_size=batch_size,
|
||||
device=device,
|
||||
dtype=torch.float16,
|
||||
max_cache_len=seq_length + 128,
|
||||
)
|
||||
# 1st call
|
||||
start = perf_counter()
|
||||
output = model.generate(**inputs, past_key_values=past_key_values)
|
||||
end = perf_counter()
|
||||
first_compile_generate_time = end - start
|
||||
logger.info(f"completed first compile generation in: {first_compile_generate_time}s")
|
||||
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
|
||||
|
||||
past_key_values = StaticCache(
|
||||
model.config,
|
||||
max_batch_size=batch_size,
|
||||
device=device,
|
||||
dtype=torch.float16,
|
||||
max_cache_len=seq_length + 128,
|
||||
)
|
||||
# 2nd call
|
||||
start = perf_counter()
|
||||
output = model.generate(**inputs, past_key_values=past_key_values)
|
||||
end = perf_counter()
|
||||
second_compile_generate_time = end - start
|
||||
logger.info(f"completed second compile generation in: {second_compile_generate_time}s")
|
||||
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
|
||||
|
||||
past_key_values = StaticCache(
|
||||
model.config,
|
||||
max_batch_size=batch_size,
|
||||
device=device,
|
||||
dtype=torch.float16,
|
||||
max_cache_len=seq_length + 128,
|
||||
)
|
||||
# 3rd call
|
||||
start = perf_counter()
|
||||
output = model.generate(**inputs, past_key_values=past_key_values)
|
||||
end = perf_counter()
|
||||
third_compile_generate_time = end - start
|
||||
logger.info(f"completed third compile generation in: {third_compile_generate_time}s")
|
||||
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
|
||||
|
||||
past_key_values = StaticCache(
|
||||
model.config,
|
||||
max_batch_size=batch_size,
|
||||
device=device,
|
||||
dtype=torch.float16,
|
||||
max_cache_len=seq_length + 128,
|
||||
)
|
||||
# 4th call
|
||||
start = perf_counter()
|
||||
output = model.generate(**inputs, past_key_values=past_key_values)
|
||||
end = perf_counter()
|
||||
fourth_compile_generate_time = end - start
|
||||
logger.info(f"completed fourth compile generation in: {fourth_compile_generate_time}s")
|
||||
logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
|
||||
|
||||
metrics_recorder.collect_model_measurements(
|
||||
benchmark_id,
|
||||
{
|
||||
"model_load_time": model_load_time,
|
||||
"first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
|
||||
"second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
|
||||
"first_eager_generate_time_secs": first_eager_generate_time,
|
||||
"second_eager_generate_time_secs": second_eager_generate_time,
|
||||
"time_to_first_token_secs": time_to_first_token,
|
||||
"time_to_second_token_secs": time_to_second_token,
|
||||
"time_to_third_token_secs": time_to_third_token,
|
||||
"time_to_next_token_mean_secs": mean_time_to_next_token,
|
||||
"first_compile_generate_time_secs": first_compile_generate_time,
|
||||
"second_compile_generate_time_secs": second_compile_generate_time,
|
||||
"third_compile_generate_time_secs": third_compile_generate_time,
|
||||
"fourth_compile_generate_time_secs": fourth_compile_generate_time,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Caught exception: {e}")
|
||||
continue_metric_collection.set()
|
||||
if metrics_thread is not None:
|
||||
metrics_thread.join()
|
||||
|
||||
# Only close the recorder if we created it locally
|
||||
if should_close_recorder:
|
||||
metrics_recorder.close()
|
||||
@@ -1,35 +1,15 @@
|
||||
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import argparse
|
||||
import importlib.util
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Dict, Tuple, Optional, List
|
||||
from typing import Dict, Tuple
|
||||
|
||||
import pandas as pd
|
||||
from psycopg2.extensions import register_adapter
|
||||
from psycopg2.extras import Json
|
||||
|
||||
try:
|
||||
from psycopg2.extensions import register_adapter
|
||||
from psycopg2.extras import Json
|
||||
register_adapter(dict, Json)
|
||||
PSYCOPG2_AVAILABLE = True
|
||||
except ImportError:
|
||||
PSYCOPG2_AVAILABLE = False
|
||||
|
||||
register_adapter(dict, Json)
|
||||
|
||||
|
||||
class ImportModuleException(Exception):
|
||||
@@ -38,239 +18,61 @@ class ImportModuleException(Exception):
|
||||
|
||||
class MetricsRecorder:
|
||||
def __init__(
|
||||
self, connection, logger: logging.Logger, repository: str, branch: str, commit_id: str, commit_msg: str,
|
||||
collect_csv_data: bool = True
|
||||
self, connection, logger: logging.Logger, repository: str, branch: str, commit_id: str, commit_msg: str
|
||||
):
|
||||
self.conn = connection
|
||||
self.use_database = connection is not None
|
||||
if self.use_database:
|
||||
self.conn.autocommit = True
|
||||
self.conn.autocommit = True
|
||||
self.logger = logger
|
||||
self.repository = repository
|
||||
self.branch = branch
|
||||
self.commit_id = commit_id
|
||||
self.commit_msg = commit_msg
|
||||
self.collect_csv_data = collect_csv_data
|
||||
|
||||
# For CSV export - store all data in pandas DataFrames (only if CSV collection is enabled)
|
||||
if self.collect_csv_data:
|
||||
# Initialize empty DataFrames with proper schemas
|
||||
self.benchmarks_df = pd.DataFrame(columns=[
|
||||
'benchmark_id', 'repository', 'branch', 'commit_id', 'commit_message',
|
||||
'metadata', 'created_at'
|
||||
])
|
||||
self.device_measurements_df = pd.DataFrame(columns=[
|
||||
'benchmark_id', 'cpu_util', 'mem_megabytes', 'gpu_util',
|
||||
'gpu_mem_megabytes', 'time'
|
||||
])
|
||||
self.model_measurements_df = pd.DataFrame(columns=[
|
||||
'benchmark_id', 'time', 'model_load_time', 'first_eager_forward_pass_time_secs',
|
||||
'second_eager_forward_pass_time_secs', 'first_eager_generate_time_secs',
|
||||
'second_eager_generate_time_secs', 'time_to_first_token_secs',
|
||||
'time_to_second_token_secs', 'time_to_third_token_secs',
|
||||
'time_to_next_token_mean_secs', 'first_compile_generate_time_secs',
|
||||
'second_compile_generate_time_secs', 'third_compile_generate_time_secs',
|
||||
'fourth_compile_generate_time_secs'
|
||||
])
|
||||
else:
|
||||
self.benchmarks_df = None
|
||||
self.device_measurements_df = None
|
||||
self.model_measurements_df = None
|
||||
|
||||
def initialise_benchmark(self, metadata: dict[str, str]) -> str:
|
||||
def initialise_benchmark(self, metadata: dict[str, str]) -> int:
|
||||
"""
|
||||
Creates a new benchmark, returns the benchmark id (UUID)
|
||||
Creates a new benchmark, returns the benchmark id
|
||||
"""
|
||||
# Generate a unique UUID for this benchmark
|
||||
benchmark_id = str(uuid.uuid4())
|
||||
|
||||
if self.use_database:
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"INSERT INTO benchmarks (benchmark_id, repository, branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s, %s, %s)",
|
||||
(benchmark_id, self.repository, self.branch, self.commit_id, self.commit_msg, metadata),
|
||||
)
|
||||
self.logger.debug(f"initialised benchmark #{benchmark_id}")
|
||||
|
||||
# Store benchmark data for CSV export (if enabled)
|
||||
if self.collect_csv_data:
|
||||
# Add row to pandas DataFrame
|
||||
new_row = pd.DataFrame([{
|
||||
'benchmark_id': benchmark_id,
|
||||
'repository': self.repository,
|
||||
'branch': self.branch,
|
||||
'commit_id': self.commit_id,
|
||||
'commit_message': self.commit_msg,
|
||||
'metadata': json.dumps(metadata),
|
||||
'created_at': datetime.utcnow().isoformat()
|
||||
}])
|
||||
self.benchmarks_df = pd.concat([self.benchmarks_df, new_row], ignore_index=True)
|
||||
|
||||
mode_info = []
|
||||
if self.use_database:
|
||||
mode_info.append("database")
|
||||
if self.collect_csv_data:
|
||||
mode_info.append("CSV")
|
||||
mode_str = " + ".join(mode_info) if mode_info else "no storage"
|
||||
|
||||
self.logger.debug(f"initialised benchmark #{benchmark_id} ({mode_str} mode)")
|
||||
return benchmark_id
|
||||
# gpu_name: str, model_id: str
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"INSERT INTO benchmarks (repository, branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s, %s) RETURNING benchmark_id",
|
||||
(self.repository, self.branch, self.commit_id, self.commit_msg, metadata),
|
||||
)
|
||||
benchmark_id = cur.fetchone()[0]
|
||||
logger.debug(f"initialised benchmark #{benchmark_id}")
|
||||
return benchmark_id
|
||||
|
||||
def collect_device_measurements(self, benchmark_id: str, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes):
|
||||
def collect_device_measurements(self, benchmark_id: int, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes):
|
||||
"""
|
||||
Collect device metrics, such as CPU & GPU usage. These are "static", as in you cannot pass arbitrary arguments to the function.
|
||||
"""
|
||||
# Store device measurements for CSV export (if enabled)
|
||||
if self.collect_csv_data:
|
||||
# Add row to pandas DataFrame
|
||||
new_row = pd.DataFrame([{
|
||||
'benchmark_id': benchmark_id,
|
||||
'cpu_util': cpu_util,
|
||||
'mem_megabytes': mem_megabytes,
|
||||
'gpu_util': gpu_util,
|
||||
'gpu_mem_megabytes': gpu_mem_megabytes,
|
||||
'time': datetime.utcnow().isoformat()
|
||||
}])
|
||||
self.device_measurements_df = pd.concat([self.device_measurements_df, new_row], ignore_index=True)
|
||||
|
||||
# Store in database if available
|
||||
if self.use_database:
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
|
||||
(benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
|
||||
)
|
||||
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
|
||||
(benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
|
||||
)
|
||||
self.logger.debug(
|
||||
f"collected device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]"
|
||||
f"inserted device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]"
|
||||
)
|
||||
|
||||
def collect_model_measurements(self, benchmark_id: str, measurements: dict[str, float]):
|
||||
# Store model measurements for CSV export (if enabled)
|
||||
if self.collect_csv_data:
|
||||
# Add row to pandas DataFrame with flattened measurements
|
||||
row_data = {
|
||||
'benchmark_id': benchmark_id,
|
||||
'time': datetime.utcnow().isoformat()
|
||||
}
|
||||
# Flatten the measurements dict into the row
|
||||
row_data.update(measurements)
|
||||
|
||||
new_row = pd.DataFrame([row_data])
|
||||
self.model_measurements_df = pd.concat([self.model_measurements_df, new_row], ignore_index=True)
|
||||
|
||||
# Store in database if available
|
||||
if self.use_database:
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO model_measurements (
|
||||
benchmark_id,
|
||||
measurements
|
||||
) VALUES (%s, %s)
|
||||
""",
|
||||
(
|
||||
benchmark_id,
|
||||
measurements,
|
||||
),
|
||||
)
|
||||
|
||||
self.logger.debug(f"collected model measurements for benchmark #{benchmark_id}: {measurements}")
|
||||
|
||||
def export_to_csv(self, output_dir: str = "benchmark_results"):
|
||||
"""
|
||||
Export all collected data to CSV files using pandas DataFrames
|
||||
"""
|
||||
if not self.collect_csv_data:
|
||||
self.logger.warning("CSV data collection is disabled - no CSV files will be generated")
|
||||
return
|
||||
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
self.logger.info(f"Created output directory: {output_dir}")
|
||||
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
files_created = []
|
||||
|
||||
# Export using pandas DataFrames
|
||||
self._export_pandas_data(output_dir, timestamp, files_created)
|
||||
|
||||
self.logger.info(f"CSV export complete! Created {len(files_created)} files in {output_dir}")
|
||||
|
||||
def _export_pandas_data(self, output_dir: str, timestamp: str, files_created: list):
|
||||
"""
|
||||
Export CSV files using pandas DataFrames
|
||||
"""
|
||||
# Export benchmarks
|
||||
benchmarks_file = os.path.join(output_dir, f"benchmarks_{timestamp}.csv")
|
||||
self.benchmarks_df.to_csv(benchmarks_file, index=False)
|
||||
files_created.append(benchmarks_file)
|
||||
self.logger.info(f"Exported {len(self.benchmarks_df)} benchmark records to {benchmarks_file}")
|
||||
|
||||
# Export device measurements
|
||||
device_file = os.path.join(output_dir, f"device_measurements_{timestamp}.csv")
|
||||
self.device_measurements_df.to_csv(device_file, index=False)
|
||||
files_created.append(device_file)
|
||||
self.logger.info(f"Exported {len(self.device_measurements_df)} device measurement records to {device_file}")
|
||||
|
||||
# Export model measurements (already flattened)
|
||||
model_file = os.path.join(output_dir, f"model_measurements_{timestamp}.csv")
|
||||
self.model_measurements_df.to_csv(model_file, index=False)
|
||||
files_created.append(model_file)
|
||||
self.logger.info(f"Exported {len(self.model_measurements_df)} model measurement records to {model_file}")
|
||||
|
||||
# Create comprehensive summary using pandas operations
|
||||
summary_file = os.path.join(output_dir, f"benchmark_summary_{timestamp}.csv")
|
||||
self._create_summary(summary_file)
|
||||
files_created.append(summary_file)
|
||||
|
||||
def _create_summary(self, summary_file: str):
|
||||
"""
|
||||
Create a comprehensive summary CSV using pandas operations
|
||||
"""
|
||||
if len(self.benchmarks_df) == 0:
|
||||
# Create empty summary file
|
||||
summary_df = pd.DataFrame()
|
||||
summary_df.to_csv(summary_file, index=False)
|
||||
self.logger.info(f"Created empty benchmark summary at {summary_file}")
|
||||
return
|
||||
|
||||
# Start with benchmarks as the base
|
||||
summary_df = self.benchmarks_df.copy()
|
||||
|
||||
# Add model measurements (join on benchmark_id)
|
||||
if len(self.model_measurements_df) > 0:
|
||||
# Drop 'time' column from model measurements to avoid conflicts
|
||||
model_df = self.model_measurements_df.drop(columns=['time'], errors='ignore')
|
||||
summary_df = summary_df.merge(model_df, on='benchmark_id', how='left')
|
||||
|
||||
# Calculate device measurement aggregates using pandas groupby
|
||||
if len(self.device_measurements_df) > 0:
|
||||
device_agg = self.device_measurements_df.groupby('benchmark_id').agg({
|
||||
'cpu_util': ['mean', 'max', 'std', 'count'],
|
||||
'mem_megabytes': ['mean', 'max', 'std'],
|
||||
'gpu_util': ['mean', 'max', 'std'],
|
||||
'gpu_mem_megabytes': ['mean', 'max', 'std']
|
||||
}).round(3)
|
||||
|
||||
# Flatten column names
|
||||
device_agg.columns = [f"{col[0]}_{col[1]}" for col in device_agg.columns]
|
||||
device_agg = device_agg.reset_index()
|
||||
|
||||
# Rename count column to be more descriptive
|
||||
if 'cpu_util_count' in device_agg.columns:
|
||||
device_agg = device_agg.rename(columns={'cpu_util_count': 'device_measurement_count'})
|
||||
|
||||
# Merge with summary
|
||||
summary_df = summary_df.merge(device_agg, on='benchmark_id', how='left')
|
||||
|
||||
# Export the comprehensive summary
|
||||
summary_df.to_csv(summary_file, index=False)
|
||||
self.logger.info(f"Created comprehensive benchmark summary with {len(summary_df)} records at {summary_file}")
|
||||
def collect_model_measurements(self, benchmark_id: int, measurements: dict[str, float]):
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO model_measurements (
|
||||
benchmark_id,
|
||||
measurements
|
||||
) VALUES (%s, %s)
|
||||
""",
|
||||
(
|
||||
benchmark_id,
|
||||
measurements,
|
||||
),
|
||||
)
|
||||
self.logger.debug(f"inserted model measurements for benchmark #{benchmark_id}: {measurements}")
|
||||
|
||||
def close(self):
|
||||
if self.use_database and self.conn:
|
||||
self.conn.close()
|
||||
self.conn.close()
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -283,7 +85,7 @@ handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
|
||||
|
||||
def parse_arguments() -> tuple[str, str, str, str, bool, str]:
|
||||
def parse_arguments() -> tuple[str, str, str, str]:
|
||||
"""
|
||||
Parse command line arguments for the benchmarking CLI.
|
||||
"""
|
||||
@@ -312,27 +114,10 @@ def parse_arguments() -> tuple[str, str, str, str, bool, str]:
|
||||
type=str,
|
||||
help="The commit message associated with the commit, truncated to 70 characters.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--csv",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Enable CSV output files generation."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--csv-output-dir",
|
||||
type=str,
|
||||
default="benchmark_results",
|
||||
help="Directory for CSV output files (default: benchmark_results)."
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# CSV is disabled by default, only enabled when --csv is used
|
||||
generate_csv = args.csv
|
||||
|
||||
return args.repository, args.branch, args.commit_id, args.commit_msg, generate_csv, args.csv_output_dir
|
||||
return args.repository, args.branch, args.commit_id, args.commit_msg
|
||||
|
||||
|
||||
def import_from_path(module_name, file_path):
|
||||
@@ -346,124 +131,22 @@ def import_from_path(module_name, file_path):
|
||||
raise ImportModuleException(f"failed to load python module: {e}")
|
||||
|
||||
|
||||
def create_database_connection():
|
||||
"""
|
||||
Try to create a database connection. Returns None if connection fails.
|
||||
"""
|
||||
if not PSYCOPG2_AVAILABLE:
|
||||
logger.warning("psycopg2 not available - running in CSV-only mode")
|
||||
return None
|
||||
|
||||
try:
|
||||
import psycopg2
|
||||
conn = psycopg2.connect("dbname=metrics")
|
||||
logger.info("Successfully connected to database")
|
||||
return conn
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to connect to database: {e}. Running in CSV-only mode")
|
||||
return None
|
||||
|
||||
|
||||
def create_global_metrics_recorder(repository: str, branch: str, commit_id: str, commit_msg: str,
|
||||
generate_csv: bool = False) -> MetricsRecorder:
|
||||
"""
|
||||
Create a global metrics recorder that will be used across all benchmarks.
|
||||
"""
|
||||
connection = create_database_connection()
|
||||
recorder = MetricsRecorder(connection, logger, repository, branch, commit_id, commit_msg, generate_csv)
|
||||
|
||||
# Log the storage mode
|
||||
storage_modes = []
|
||||
if connection is not None:
|
||||
storage_modes.append("database")
|
||||
if generate_csv:
|
||||
storage_modes.append("CSV")
|
||||
|
||||
if not storage_modes:
|
||||
logger.warning("Running benchmarks with NO data storage (no database connection, CSV disabled)")
|
||||
logger.warning("Use --csv flag to enable CSV output when database is unavailable")
|
||||
else:
|
||||
logger.info(f"Running benchmarks with: {' + '.join(storage_modes)} storage")
|
||||
|
||||
return recorder
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
benchmarks_folder_path = os.path.dirname(os.path.realpath(__file__))
|
||||
benches_folder_path = os.path.join(benchmarks_folder_path, "benches")
|
||||
|
||||
repository, branch, commit_id, commit_msg, generate_csv, csv_output_dir = parse_arguments()
|
||||
|
||||
# Create a global metrics recorder
|
||||
global_metrics_recorder = create_global_metrics_recorder(repository, branch, commit_id, commit_msg, generate_csv)
|
||||
|
||||
successful_benchmarks = 0
|
||||
failed_benchmarks = 0
|
||||
|
||||
# Automatically discover all benchmark modules in benches/ folder
|
||||
benchmark_modules = []
|
||||
|
||||
if os.path.exists(benches_folder_path):
|
||||
logger.debug(f"Scanning for benchmarks in: {benches_folder_path}")
|
||||
for entry in os.scandir(benches_folder_path):
|
||||
repository, branch, commit_id, commit_msg = parse_arguments()
|
||||
|
||||
for entry in os.scandir(benchmarks_folder_path):
|
||||
try:
|
||||
if not entry.name.endswith(".py"):
|
||||
continue
|
||||
if entry.name.startswith("__"): # Skip __init__.py, __pycache__, etc.
|
||||
if entry.path == __file__:
|
||||
continue
|
||||
|
||||
# Check if the file has a run_benchmark function
|
||||
try:
|
||||
logger.debug(f"checking if benches/{entry.name} has run_benchmark function")
|
||||
module = import_from_path(entry.name.split(".")[0], entry.path)
|
||||
if hasattr(module, 'run_benchmark'):
|
||||
benchmark_modules.append(entry.name)
|
||||
logger.debug(f"discovered benchmark: {entry.name}")
|
||||
else:
|
||||
logger.debug(f"skipping {entry.name} - no run_benchmark function found")
|
||||
except Exception as e:
|
||||
logger.debug(f"failed to check benches/{entry.name}: {e}")
|
||||
else:
|
||||
logger.warning(f"Benches directory not found: {benches_folder_path}")
|
||||
|
||||
if benchmark_modules:
|
||||
logger.info(f"Discovered {len(benchmark_modules)} benchmark(s): {benchmark_modules}")
|
||||
else:
|
||||
logger.warning("No benchmark modules found in benches/ directory")
|
||||
|
||||
for module_name in benchmark_modules:
|
||||
module_path = os.path.join(benches_folder_path, module_name)
|
||||
try:
|
||||
logger.debug(f"loading: {module_name}")
|
||||
module = import_from_path(module_name.split(".")[0], module_path)
|
||||
logger.info(f"running benchmarks in: {module_name}")
|
||||
|
||||
# Check if the module has an updated run_benchmark function that accepts metrics_recorder
|
||||
try:
|
||||
# Try the new signature first
|
||||
module.run_benchmark(logger, repository, branch, commit_id, commit_msg, global_metrics_recorder)
|
||||
except TypeError:
|
||||
# Fall back to the old signature for backward compatibility
|
||||
logger.warning(f"Module {module_name} using old run_benchmark signature - database connection will be created per module")
|
||||
module.run_benchmark(logger, repository, branch, commit_id, commit_msg)
|
||||
|
||||
successful_benchmarks += 1
|
||||
logger.debug(f"loading: {entry.name}")
|
||||
module = import_from_path(entry.name.split(".")[0], entry.path)
|
||||
logger.info(f"running benchmarks in: {entry.name}")
|
||||
module.run_benchmark(logger, repository, branch, commit_id, commit_msg)
|
||||
except ImportModuleException as e:
|
||||
logger.error(e)
|
||||
failed_benchmarks += 1
|
||||
except Exception as e:
|
||||
logger.error(f"error running benchmarks for {module_name}: {e}")
|
||||
failed_benchmarks += 1
|
||||
|
||||
# Export CSV results at the end (if enabled)
|
||||
try:
|
||||
if generate_csv:
|
||||
global_metrics_recorder.export_to_csv(csv_output_dir)
|
||||
logger.info(f"CSV reports have been generated and saved to the {csv_output_dir} directory")
|
||||
else:
|
||||
logger.info("CSV generation disabled - no CSV files created (use --csv to enable)")
|
||||
|
||||
logger.info(f"Benchmark run completed. Successful: {successful_benchmarks}, Failed: {failed_benchmarks}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to export CSV results: {e}")
|
||||
finally:
|
||||
global_metrics_recorder.close()
|
||||
logger.error(f"error running benchmarks for {entry.name}: {e}")
|
||||
|
||||
@@ -19,7 +19,7 @@ backend:
|
||||
model: meta-llama/Llama-2-7b-hf
|
||||
cache_implementation: static
|
||||
torch_compile: true
|
||||
dtype: float16
|
||||
torch_dtype: float16
|
||||
torch_compile_config:
|
||||
backend: inductor
|
||||
mode: reduce-overhead
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user