Fix quantization tests (#29914)

* revert back to torch 2.1.1 * run test * switch to torch 2.2.1 * udapte dockerfile * fix awq tests * fix test * run quanto tests * update tests * split quantization tests * fix * fix again * final fix * fix report artifact * build docker again * Revert "build docker again" This reverts commit 399a5f9d9308da071d79034f238c719de0f3532e. * debug * revert * style * new notification system * testing notfication * rebuild docker * fix_prev_ci_results * typo * remove warning * fix typo * fix artifact name * debug * issue fixed * debug again * fix * fix time * test notif with faling test * typo * issues again * final fix ? * run all quantization tests again * remove name to clear space * revert modfiication done on workflow * fix * build docker * build only quant docker * fix quantization ci * fix * fix report * better quantization_matrix * add print * revert to the basic one
2024-04-09 17:10:29 +02:00
parent 6487e9b370
commit 58a939c6b7
7 changed files with 324 additions and 30 deletions
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@@ -33,7 +33,6 @@ env:

 jobs:
  setup:
-    if: ${{ inputs.job == 'run_tests_gpu' }}
    name: Setup
    strategy:
      matrix:
@@ -45,6 +44,7 @@ jobs:
    outputs:
      folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
      slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
+      quantization_matrix: ${{ steps.set-matrix-quantization.outputs.quantization_matrix }}
    steps:
      - name: Update clone
        working-directory: /transformers
@@ -63,11 +63,19 @@ jobs:
        run: pip freeze

      - id: set-matrix
+        if: ${{ inputs.job == 'run_tests_gpu' }}
        name: Identify models to test
        working-directory: /transformers/tests
        run: |
          echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
          echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
+      
+      - id: set-matrix-quantization
+        if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }}
+        name: Identify quantization method to test
+        working-directory: /transformers/tests
+        run: |
+          echo "quantization_matrix=$(python3 -c 'import os; tests = os.getcwd(); quantization_tests = os.listdir(os.path.join(tests, "quantization")); d = sorted(list(filter(os.path.isdir, [f"quantization/{x}" for x in quantization_tests]))) ;  print(d)')" >> $GITHUB_OUTPUT

      - name: NVIDIA-SMI
        run: |
@@ -303,16 +311,26 @@ jobs:

  run_tests_quantization_torch_gpu:
    if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }}
-    name: Quantization tests
+    name: " "
    strategy:
      fail-fast: false
      matrix:
+        folders: ${{ fromJson(needs.setup.outputs.quantization_matrix) }}
        machine_type: [single-gpu, multi-gpu]
    runs-on: ['${{ matrix.machine_type }}', nvidia-gpu, t4, daily-ci]
    container:
      image: huggingface/transformers-quantization-latest-gpu
      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
    steps:
+      - name: Echo folder ${{ matrix.folders }}
+        shell: bash
+        run: |
+          echo "${{ matrix.folders }}"
+          matrix_folders=${{ matrix.folders }}
+          matrix_folders=${matrix_folders/'quantization/'/'quantization_'}
+          echo "$matrix_folders"
+          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
+
      - name: Update clone
        working-directory: /transformers
        run: git fetch && git checkout ${{ github.sha }}
@@ -337,19 +355,19 @@ jobs:
      - name: Run quantization tests on GPU
        working-directory: /transformers
        run: |
-          python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_quantization_torch_gpu tests/quantization
+          python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}

      - name: Failure short reports
        if: ${{ failure() }}
        continue-on-error: true
-        run: cat /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu/failures_short.txt
+        run: cat /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }}/failures_short.txt

-      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu"
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu_${{ env.matrix_folders }}"
        if: ${{ always() }}
        uses: actions/upload-artifact@v3
        with:
-          name: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu
-          path: /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu
+          name: ${{ matrix.machine_type }}_run_tests_quantization_torch_gpu_${{ env.matrix_folders }}
+          path: /transformers/reports/${{ matrix.machine_type }}_tests_quantization_torch_gpu_${{ matrix.folders }}

  run_extract_warnings:
    # Let's only do this for the job `run_tests_gpu` to simplify the (already complex) logic.
@@ -413,4 +431,6 @@ jobs:
      slack_report_channel: ${{ inputs.slack_report_channel }}
      # This would be an empty string if `setup` is skipped.
      folder_slices: ${{ needs.setup.outputs.folder_slices }}
-    secrets: inherit
+      quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
+      
+    secrets: inherit
--- a/.github/workflows/slack-report.yml
+++ b/.github/workflows/slack-report.yml
@@ -15,6 +15,9 @@ on:
      folder_slices:
        required: true
        type: string
+      quantization_matrix:
+        required: true
+        type: string


 jobs:
@@ -32,6 +35,7 @@ jobs:
      - uses: actions/checkout@v3
      - uses: actions/download-artifact@v3
      - name: Send message to Slack
+        if: ${{ inputs.job != 'run_tests_quantization_torch_gpu' }}
        env:
          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
          CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
@@ -53,7 +57,26 @@ jobs:
          pip install slack_sdk
          pip show slack_sdk
          python utils/notification_service.py "${{ inputs.folder_slices }}"
-
+      
+      - uses: actions/checkout@v3
+      - uses: actions/download-artifact@v3
+      - name: Send message to Slack for quantization workflow
+        if: ${{ inputs.job == 'run_tests_quantization_torch_gpu' }}
+        env:
+          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
+          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
+          SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
+          CI_EVENT: scheduled
+          CI_SHA: ${{ github.sha }}
+          SETUP_STATUS: ${{ inputs.setup_status }}
+        # We pass `needs.setup.outputs.quantization_matrix` as the argument. A processing in `notification_service_quantization.py` to change
+        # `quantization/bnb` to `quantization_bnb` is required, as the artifact names use `_` instead of `/`.
+        run: |
+          sudo apt-get install -y curl
+          pip install slack_sdk
+          pip show slack_sdk
+          python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}" 
+  
      # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
      - name: Failure table artifacts
        # Only the model testing job is concerned for this step