Fix quantization tests (#29914)

* revert back to torch 2.1.1

* run test

* switch to torch 2.2.1

* udapte dockerfile

* fix awq tests

* fix test

* run quanto tests

* update tests

* split quantization tests

* fix

* fix again

* final fix

* fix report artifact

* build docker again

* Revert "build docker again"

This reverts commit 399a5f9d9308da071d79034f238c719de0f3532e.

* debug

* revert

* style

* new notification system

* testing notfication

* rebuild docker

* fix_prev_ci_results

* typo

* remove warning

* fix typo

* fix artifact name

* debug

* issue fixed

* debug again

* fix

* fix time

* test notif with faling test

* typo

* issues again

* final fix ?

* run all quantization tests again

* remove name to clear space

* revert modfiication done on workflow

* fix

* build docker

* build only quant docker

* fix quantization ci

* fix

* fix report

* better quantization_matrix

* add print

* revert to the basic one
This commit is contained in:
Marc Sun
2024-04-09 17:10:29 +02:00
committed by GitHub
parent 6487e9b370
commit 58a939c6b7
7 changed files with 324 additions and 30 deletions

View File

@@ -33,7 +33,6 @@ env:
jobs:
setup:
if: ${{ inputs.job == 'run_tests_gpu' }}
name: Setup
strategy:
matrix:
@@ -45,6 +44,7 @@ jobs:
outputs:
folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
quantization_matrix: ${{ steps.set-matrix-quantization.outputs.quantization_matrix }}
steps:
- name: Update clone
working-directory: /transformers
@@ -63,11 +63,19 @@ jobs:
run: pip freeze
- id: set-matrix
if: ${{ inputs.job == 'run_tests_gpu' }}
name: Identify models to test
working-directory: /transformers/tests
run: |
echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
- id: set-matrix-quantization
if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }}
name: Identify quantization method to test
working-directory: /transformers/tests
run: |
echo "quantization_matrix=$(python3 -c 'import os; tests = os.getcwd(); quantization_tests = os.listdir(os.path.join(tests, "quantization")); d = sorted(list(filter(os.path.isdir, [f"quantization/{x}" for x in quantization_tests]))) ; print(d)')" >> $GITHUB_OUTPUT
- name: NVIDIA-SMI
run: |
@@ -303,16 +311,26 @@ jobs:
run_tests_quantization_torch_gpu:
if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }}
name: Quantization tests
name: " "
strategy:
fail-fast: false
matrix:
folders: ${{ fromJson(needs.setup.outputs.quantization_matrix) }}
machine_type: [single-gpu, multi-gpu]
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
container:
image: huggingface/transformers-quantization-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Echo folder ${{ matrix.folders }}
shell: bash
run: |
echo "${{ matrix.folders }}"
matrix_folders=${{ matrix.folders }}
matrix_folders=${matrix_folders/'quantization/'/'quantization_'}
echo "$matrix_folders"
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
- name: Update clone
working-directory: /transformers
run: git fetch && git checkout ${{ github.sha }}
@@ -337,19 +355,19 @@ jobs:
- name: Run quantization tests on GPU
working-directory: /transformers
run: |
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_quantization_torch_gpu tests/quantization
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
- name: Failure short reports
if: ${{ failure() }}
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu/failures_short.txt
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }}/failures_short.txt
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu"
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu_${{ env.matrix_folders }}"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu
path: /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu
name: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu_${{ env.matrix_folders }}
path: /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }}
run_extract_warnings:
# Let's only do this for the job `run_tests_gpu` to simplify the (already complex) logic.
@@ -413,4 +431,6 @@ jobs:
slack_report_channel: ${{ inputs.slack_report_channel }}
# This would be an empty string if `setup` is skipped.
folder_slices: ${{ needs.setup.outputs.folder_slices }}
secrets: inherit
quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
secrets: inherit

View File

@@ -15,6 +15,9 @@ on:
folder_slices:
required: true
type: string
quantization_matrix:
required: true
type: string
jobs:
@@ -32,6 +35,7 @@ jobs:
- uses: actions/checkout@v3
- uses: actions/download-artifact@v3
- name: Send message to Slack
if: ${{ inputs.job != 'run_tests_quantization_torch_gpu' }}
env:
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
@@ -53,7 +57,26 @@ jobs:
pip install slack_sdk
pip show slack_sdk
python utils/notification_service.py "${{ inputs.folder_slices }}"
- uses: actions/checkout@v3
- uses: actions/download-artifact@v3
- name: Send message to Slack for quantization workflow
if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }}
env:
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
CI_EVENT: scheduled
CI_SHA: ${{ github.sha }}
SETUP_STATUS: ${{ inputs.setup_status }}
# We pass `needs.setup.outputs.quantization_matrix` as the argument. A processing in `notification_service_quantization.py` to change
# `quantization/bnb` to `quantization_bnb` is required, as the artifact names use `_` instead of `/`.
run: |
sudo apt-get install -y curl
pip install slack_sdk
pip show slack_sdk
python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}"
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
- name: Failure table artifacts
# Only the model testing job is concerned for this step