Fix quantization tests (#29914)
* revert back to torch 2.1.1 * run test * switch to torch 2.2.1 * udapte dockerfile * fix awq tests * fix test * run quanto tests * update tests * split quantization tests * fix * fix again * final fix * fix report artifact * build docker again * Revert "build docker again" This reverts commit 399a5f9d9308da071d79034f238c719de0f3532e. * debug * revert * style * new notification system * testing notfication * rebuild docker * fix_prev_ci_results * typo * remove warning * fix typo * fix artifact name * debug * issue fixed * debug again * fix * fix time * test notif with faling test * typo * issues again * final fix ? * run all quantization tests again * remove name to clear space * revert modfiication done on workflow * fix * build docker * build only quant docker * fix quantization ci * fix * fix report * better quantization_matrix * add print * revert to the basic one
This commit is contained in:
36
.github/workflows/self-scheduled.yml
vendored
36
.github/workflows/self-scheduled.yml
vendored
@@ -33,7 +33,6 @@ env:
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
if: ${{ inputs.job == 'run_tests_gpu' }}
|
||||
name: Setup
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -45,6 +44,7 @@ jobs:
|
||||
outputs:
|
||||
folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
|
||||
slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
|
||||
quantization_matrix: ${{ steps.set-matrix-quantization.outputs.quantization_matrix }}
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
@@ -63,11 +63,19 @@ jobs:
|
||||
run: pip freeze
|
||||
|
||||
- id: set-matrix
|
||||
if: ${{ inputs.job == 'run_tests_gpu' }}
|
||||
name: Identify models to test
|
||||
working-directory: /transformers/tests
|
||||
run: |
|
||||
echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
|
||||
echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
|
||||
|
||||
- id: set-matrix-quantization
|
||||
if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }}
|
||||
name: Identify quantization method to test
|
||||
working-directory: /transformers/tests
|
||||
run: |
|
||||
echo "quantization_matrix=$(python3 -c 'import os; tests = os.getcwd(); quantization_tests = os.listdir(os.path.join(tests, "quantization")); d = sorted(list(filter(os.path.isdir, [f"quantization/{x}" for x in quantization_tests]))) ; print(d)')" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
@@ -303,16 +311,26 @@ jobs:
|
||||
|
||||
run_tests_quantization_torch_gpu:
|
||||
if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }}
|
||||
name: Quantization tests
|
||||
name: " "
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
folders: ${{ fromJson(needs.setup.outputs.quantization_matrix) }}
|
||||
machine_type: [single-gpu, multi-gpu]
|
||||
runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
|
||||
container:
|
||||
image: huggingface/transformers-quantization-latest-gpu
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
steps:
|
||||
- name: Echo folder ${{ matrix.folders }}
|
||||
shell: bash
|
||||
run: |
|
||||
echo "${{ matrix.folders }}"
|
||||
matrix_folders=${{ matrix.folders }}
|
||||
matrix_folders=${matrix_folders/'quantization/'/'quantization_'}
|
||||
echo "$matrix_folders"
|
||||
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
||||
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
@@ -337,19 +355,19 @@ jobs:
|
||||
- name: Run quantization tests on GPU
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_quantization_torch_gpu tests/quantization
|
||||
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu/failures_short.txt
|
||||
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }}/failures_short.txt
|
||||
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu"
|
||||
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu_${{ env.matrix_folders }}"
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu
|
||||
name: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu_${{ env.matrix_folders }}
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }}
|
||||
|
||||
run_extract_warnings:
|
||||
# Let's only do this for the job `run_tests_gpu` to simplify the (already complex) logic.
|
||||
@@ -413,4 +431,6 @@ jobs:
|
||||
slack_report_channel: ${{ inputs.slack_report_channel }}
|
||||
# This would be an empty string if `setup` is skipped.
|
||||
folder_slices: ${{ needs.setup.outputs.folder_slices }}
|
||||
secrets: inherit
|
||||
quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
|
||||
|
||||
secrets: inherit
|
||||
|
||||
25
.github/workflows/slack-report.yml
vendored
25
.github/workflows/slack-report.yml
vendored
@@ -15,6 +15,9 @@ on:
|
||||
folder_slices:
|
||||
required: true
|
||||
type: string
|
||||
quantization_matrix:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
|
||||
jobs:
|
||||
@@ -32,6 +35,7 @@ jobs:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/download-artifact@v3
|
||||
- name: Send message to Slack
|
||||
if: ${{ inputs.job != 'run_tests_quantization_torch_gpu' }}
|
||||
env:
|
||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
||||
@@ -53,7 +57,26 @@ jobs:
|
||||
pip install slack_sdk
|
||||
pip show slack_sdk
|
||||
python utils/notification_service.py "${{ inputs.folder_slices }}"
|
||||
|
||||
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/download-artifact@v3
|
||||
- name: Send message to Slack for quantization workflow
|
||||
if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }}
|
||||
env:
|
||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
|
||||
CI_EVENT: scheduled
|
||||
CI_SHA: ${{ github.sha }}
|
||||
SETUP_STATUS: ${{ inputs.setup_status }}
|
||||
# We pass `needs.setup.outputs.quantization_matrix` as the argument. A processing in `notification_service_quantization.py` to change
|
||||
# `quantization/bnb` to `quantization_bnb` is required, as the artifact names use `_` instead of `/`.
|
||||
run: |
|
||||
sudo apt-get install -y curl
|
||||
pip install slack_sdk
|
||||
pip show slack_sdk
|
||||
python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}"
|
||||
|
||||
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
|
||||
- name: Failure table artifacts
|
||||
# Only the model testing job is concerned for this step
|
||||
|
||||
Reference in New Issue
Block a user