* refactor: benchmarks Based on a discussion with @LysandreJik & @ArthurZucker, the goal of this PR is to improve transformers' benchmark system. This is a WIP, for the moment the infrastructure required to make things work is not ready. Will update the PR description when it is the case. * feat: add db init in benchmarks CI * fix: pg_config is missing in runner * fix: add psql to the runner * fix: connect info from env vars + PR comments * refactor: set database as env var * fix: invalid working directory * fix: `commit_msg` -> `commit_message` * fix: git marking checked out repo as unsafe * feat: add logging * fix: invalid device * feat: update grafana dashboard for prod grafana * feat: add `commit_id` to header table * feat: commit latest version of dashboard * feat: move measurements into json field * feat: remove drop table migration queries * fix: `torch.arrange` -> `torch.arange` * fix: add missing `s` to `cache_position` positional argument * fix: change model * revert: `cache_positions` -> `cache_position` * fix: set device for `StaticCache` * fix: set `StaticCache` dtype * feat: limit max cache len * fix script * raise error on failure! * not try catch * try to skip generate compilation * update * update docker image! * update * update again!@ * update * updates * ??? * ?? * use `torch.cuda.synchronize()` * fix json * nits * fix * fixed! * f**k * feat: add TTNT panels * feat: add try except --------- Co-authored-by: Arthur Zucker <arthur.zucker@gmail.com>
73 lines
2.4 KiB
YAML
73 lines
2.4 KiB
YAML
name: Self-hosted runner (benchmark)
|
|
|
|
on:
|
|
push:
|
|
branches: [main]
|
|
pull_request:
|
|
types: [ opened, labeled, reopened, synchronize ]
|
|
|
|
concurrency:
|
|
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
|
cancel-in-progress: true
|
|
|
|
env:
|
|
HF_HOME: /mnt/cache
|
|
|
|
jobs:
|
|
benchmark:
|
|
name: Benchmark
|
|
runs-on:
|
|
group: aws-g5-4xlarge-cache
|
|
container:
|
|
image: huggingface/transformers-pytorch-gpu
|
|
options: --gpus all --privileged --ipc host
|
|
steps:
|
|
- name: Get repo
|
|
if: github.event_name == 'pull_request'
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ github.event.pull_request.head.sha }}
|
|
|
|
- name: Get repo
|
|
if: github.event_name == 'push'
|
|
uses: actions/checkout@v4
|
|
with:
|
|
ref: ${{ github.sha }}
|
|
|
|
- name: Install libpq-dev & psql
|
|
run: |
|
|
apt update
|
|
apt install -y libpq-dev postgresql-client
|
|
|
|
- name: Install benchmark script dependencies
|
|
run: python3 -m pip install -r benchmark/requirements.txt
|
|
|
|
- name: Reinstall transformers in edit mode (remove the one installed during docker image build)
|
|
working-directory: /transformers
|
|
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e ".[torch]"
|
|
|
|
- name: Run database init script
|
|
run: |
|
|
psql -f benchmark/init_db.sql
|
|
env:
|
|
PGDATABASE: metrics
|
|
PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }}
|
|
PGUSER: transformers_benchmarks
|
|
PGPASSWORD: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGPASSWORD }}
|
|
|
|
- name: Run benchmark
|
|
run: |
|
|
git config --global --add safe.directory /__w/transformers/transformers
|
|
if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then
|
|
commit_id=$(echo "${{ github.event.pull_request.head.sha }}")
|
|
elif [ "$GITHUB_EVENT_NAME" = "push" ]; then
|
|
commit_id=$GITHUB_SHA
|
|
fi
|
|
commit_msg=$(git show -s --format=%s | cut -c1-70)
|
|
python3 benchmark/llama.py "${{ github.head_ref || github.ref_name }}" "$commit_id" "$commit_msg"
|
|
env:
|
|
HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
|
|
PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }}
|
|
PGUSER: transformers_benchmarks
|
|
PGPASSWORD: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGPASSWORD }}
|