Fix missing job action button in CI report (#17270)
* use matrix.machine_type * fix job names used in job_link Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
56
.github/workflows/self-push.yml
vendored
56
.github/workflows/self-push.yml
vendored
@@ -73,7 +73,7 @@ jobs:
|
|||||||
echo "::set-output name=test_map::$test_map"
|
echo "::set-output name=test_map::$test_map"
|
||||||
|
|
||||||
run_tests_single_gpu:
|
run_tests_single_gpu:
|
||||||
name: Model Tests on single GPU
|
name: Model tests
|
||||||
needs: setup
|
needs: setup
|
||||||
# `dummy` means there is no test to run
|
# `dummy` means there is no test to run
|
||||||
if: contains(fromJson(needs.setup.outputs.matrix), 'dummy') != true
|
if: contains(fromJson(needs.setup.outputs.matrix), 'dummy') != true
|
||||||
@@ -81,8 +81,8 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
|
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
|
||||||
machines: [single-gpu]
|
machine_type: [single-gpu]
|
||||||
runs-on: [self-hosted, docker-gpu, '${{ matrix.machines }}']
|
runs-on: [self-hosted, docker-gpu, '${{ matrix.machine_type }}']
|
||||||
container:
|
container:
|
||||||
image: huggingface/transformers-all-latest-gpu
|
image: huggingface/transformers-all-latest-gpu
|
||||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
@@ -117,22 +117,22 @@ jobs:
|
|||||||
- name: Run all non-slow selected tests on GPU
|
- name: Run all non-slow selected tests on GPU
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: |
|
run: |
|
||||||
python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machines }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}
|
python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
run: cat /transformers/reports/${{ matrix.machines }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
|
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v2
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.machines }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
||||||
path: /transformers/reports/${{ matrix.machines }}_tests_gpu_${{ matrix.folders }}
|
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
|
||||||
|
|
||||||
run_tests_multi_gpu:
|
run_tests_multi_gpu:
|
||||||
name: Model Tests on multi GPUs
|
name: Model tests
|
||||||
needs: setup
|
needs: setup
|
||||||
# `dummy` means there is no test to run
|
# `dummy` means there is no test to run
|
||||||
if: contains(fromJson(needs.setup.outputs.matrix), 'dummy') != true
|
if: contains(fromJson(needs.setup.outputs.matrix), 'dummy') != true
|
||||||
@@ -140,8 +140,8 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
|
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
|
||||||
machines: [multi-gpu]
|
machine_type: [multi-gpu]
|
||||||
runs-on: [self-hosted, docker-gpu, '${{ matrix.machines }}']
|
runs-on: [self-hosted, docker-gpu, '${{ matrix.machine_type }}']
|
||||||
container:
|
container:
|
||||||
image: huggingface/transformers-all-latest-gpu
|
image: huggingface/transformers-all-latest-gpu
|
||||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
@@ -178,29 +178,29 @@ jobs:
|
|||||||
MKL_SERVICE_FORCE_INTEL: 1
|
MKL_SERVICE_FORCE_INTEL: 1
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: |
|
run: |
|
||||||
python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machines }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}
|
python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
run: cat /transformers/reports/${{ matrix.machines }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
|
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v2
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.machines }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
||||||
path: /transformers/reports/${{ matrix.machines }}_tests_gpu_${{ matrix.folders }}
|
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
|
||||||
|
|
||||||
run_tests_torch_cuda_extensions_single_gpu:
|
run_tests_torch_cuda_extensions_single_gpu:
|
||||||
name: Torch CUDA extension tests on single GPU
|
name: Torch CUDA extension tests
|
||||||
needs: setup
|
needs: setup
|
||||||
if: contains(fromJson(needs.setup.outputs.matrix), 'deepspeed') || contains(fromJson(needs.setup.outputs.matrix), 'extended')
|
if: contains(fromJson(needs.setup.outputs.matrix), 'deepspeed') || contains(fromJson(needs.setup.outputs.matrix), 'extended')
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
machines: [single-gpu]
|
machine_type: [single-gpu]
|
||||||
runs-on: [self-hosted, docker-gpu, '${{ matrix.machines }}']
|
runs-on: [self-hosted, docker-gpu, '${{ matrix.machine_type }}']
|
||||||
container:
|
container:
|
||||||
image: nvcr.io/nvidia/pytorch:21.03-py3
|
image: nvcr.io/nvidia/pytorch:21.03-py3
|
||||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
@@ -227,29 +227,29 @@ jobs:
|
|||||||
- name: Run all non-slow selected tests on GPU
|
- name: Run all non-slow selected tests on GPU
|
||||||
# TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
|
# TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
|
||||||
run: |
|
run: |
|
||||||
python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ matrix.machines }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended
|
python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
run: cat reports/${{ matrix.machines }}_tests_torch_cuda_extensions_gpu/failures_short.txt
|
run: cat reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v2
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.machines }}_run_tests_torch_cuda_extensions_gpu_test_reports
|
name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports
|
||||||
path: reports/${{ matrix.machines }}_tests_torch_cuda_extensions_gpu
|
path: reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
|
||||||
|
|
||||||
run_tests_torch_cuda_extensions_multi_gpu:
|
run_tests_torch_cuda_extensions_multi_gpu:
|
||||||
name: Torch CUDA extension tests on multi GPUs
|
name: Torch CUDA extension tests
|
||||||
needs: setup
|
needs: setup
|
||||||
if: contains(fromJson(needs.setup.outputs.matrix), 'deepspeed') || contains(fromJson(needs.setup.outputs.matrix), 'extended')
|
if: contains(fromJson(needs.setup.outputs.matrix), 'deepspeed') || contains(fromJson(needs.setup.outputs.matrix), 'extended')
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
machines: [multi-gpu]
|
machine_type: [multi-gpu]
|
||||||
runs-on: [self-hosted, docker-gpu, '${{ matrix.machines }}']
|
runs-on: [self-hosted, docker-gpu, '${{ matrix.machine_type }}']
|
||||||
container:
|
container:
|
||||||
image: nvcr.io/nvidia/pytorch:21.03-py3
|
image: nvcr.io/nvidia/pytorch:21.03-py3
|
||||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
@@ -277,19 +277,19 @@ jobs:
|
|||||||
- name: Run all non-slow selected tests on GPU
|
- name: Run all non-slow selected tests on GPU
|
||||||
# TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
|
# TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
|
||||||
run: |
|
run: |
|
||||||
python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ matrix.machines }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended
|
python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
run: cat reports/${{ matrix.machines }}_tests_torch_cuda_extensions_gpu/failures_short.txt
|
run: cat reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v2
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.machines }}_run_tests_torch_cuda_extensions_gpu_test_reports
|
name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports
|
||||||
path: reports/${{ matrix.machines }}_tests_torch_cuda_extensions_gpu
|
path: reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
|
||||||
|
|
||||||
send_results:
|
send_results:
|
||||||
name: Send results to webhook
|
name: Send results to webhook
|
||||||
|
|||||||
109
.github/workflows/self-scheduled.yml
vendored
109
.github/workflows/self-scheduled.yml
vendored
@@ -26,8 +26,8 @@ jobs:
|
|||||||
name: Setup
|
name: Setup
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
machines: [multi-gpu-docker, single-gpu-docker]
|
machine_type: [single-gpu, multi-gpu]
|
||||||
runs-on: ${{ matrix.machines }}
|
runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker') }}
|
||||||
container:
|
container:
|
||||||
image: huggingface/transformers-all-latest-gpu
|
image: huggingface/transformers-all-latest-gpu
|
||||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
@@ -69,8 +69,8 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
|
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
|
||||||
machines: [single-gpu-docker]
|
machine_type: [single-gpu]
|
||||||
runs-on: ${{ matrix.machines }}
|
runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker') }}
|
||||||
container:
|
container:
|
||||||
image: huggingface/transformers-all-latest-gpu
|
image: huggingface/transformers-all-latest-gpu
|
||||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
@@ -87,34 +87,25 @@ jobs:
|
|||||||
echo "$matrix_folders"
|
echo "$matrix_folders"
|
||||||
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
||||||
|
|
||||||
# Set machine type, i.e. `single-gpu` or `multi-gpu`. Here we just remove `-docker`.
|
|
||||||
- name: Set machine type from ${{ matrix.machines }}
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
machine_type=${{ matrix.machines }}
|
|
||||||
machine_type=${machine_type/'-docker'/''}
|
|
||||||
echo "machine_type=$machine_type"
|
|
||||||
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
- name: Update clone
|
- name: Update clone
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: git fetch && git checkout ${{ github.sha }}
|
run: git fetch && git checkout ${{ github.sha }}
|
||||||
|
|
||||||
- name: Run all tests on GPU
|
- name: Run all tests on GPU
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: python3 -m pytest -v --make-reports=${{ env.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
|
run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
run: cat /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
|
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v2
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
||||||
path: /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}
|
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
|
||||||
|
|
||||||
run_tests_multi_gpu:
|
run_tests_multi_gpu:
|
||||||
name: Model tests
|
name: Model tests
|
||||||
@@ -122,8 +113,8 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
|
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
|
||||||
machines: [multi-gpu-docker]
|
machine_type: [multi-gpu]
|
||||||
runs-on: ${{ matrix.machines }}
|
runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker') }}
|
||||||
container:
|
container:
|
||||||
image: huggingface/transformers-all-latest-gpu
|
image: huggingface/transformers-all-latest-gpu
|
||||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
@@ -140,34 +131,25 @@ jobs:
|
|||||||
echo "$matrix_folders"
|
echo "$matrix_folders"
|
||||||
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
||||||
|
|
||||||
# Set machine type, i.e. `single-gpu` or `multi-gpu`. Here we just remove `-docker`.
|
|
||||||
- name: Set machine type from ${{ matrix.machines }}
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
machine_type=${{ matrix.machines }}
|
|
||||||
machine_type=${machine_type/'-docker'/''}
|
|
||||||
echo "machine_type=$machine_type"
|
|
||||||
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
- name: Update clone
|
- name: Update clone
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: git fetch && git checkout ${{ github.sha }}
|
run: git fetch && git checkout ${{ github.sha }}
|
||||||
|
|
||||||
- name: Run all tests on GPU
|
- name: Run all tests on GPU
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: python3 -m pytest -v --make-reports=${{ env.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
|
run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
run: cat /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
|
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v2
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
||||||
path: /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}
|
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
|
||||||
|
|
||||||
run_examples_gpu:
|
run_examples_gpu:
|
||||||
name: Examples directory
|
name: Examples directory
|
||||||
@@ -204,22 +186,13 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
machines: [multi-gpu-docker, single-gpu-docker]
|
machine_type: [single-gpu, multi-gpu]
|
||||||
runs-on: ${{ matrix.machines }}
|
runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker') }}
|
||||||
container:
|
container:
|
||||||
image: huggingface/transformers-pytorch-gpu
|
image: huggingface/transformers-pytorch-gpu
|
||||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
needs: setup
|
needs: setup
|
||||||
steps:
|
steps:
|
||||||
# Set machine type, i.e. `single-gpu` or `multi-gpu`. Here we just remove `-docker`.
|
|
||||||
- name: Set machine type from ${{ matrix.machines }}
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
machine_type=${{ matrix.machines }}
|
|
||||||
machine_type=${machine_type/'-docker'/''}
|
|
||||||
echo "machine_type=$machine_type"
|
|
||||||
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
- name: Update clone
|
- name: Update clone
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: git fetch && git checkout ${{ github.sha }}
|
run: git fetch && git checkout ${{ github.sha }}
|
||||||
@@ -229,41 +202,32 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
RUN_PIPELINE_TESTS: yes
|
RUN_PIPELINE_TESTS: yes
|
||||||
run: |
|
run: |
|
||||||
python3 -m pytest -n 1 -v --dist=loadfile -m is_pipeline_test --make-reports=${{ env.machine_type }}_tests_torch_pipeline_gpu tests
|
python3 -m pytest -n 1 -v --dist=loadfile -m is_pipeline_test --make-reports=${{ matrix.machine_type }}_tests_torch_pipeline_gpu tests
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
run: cat /transformers/reports/${{ env.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt
|
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v2
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: ${{ env.machine_type }}_run_tests_torch_pipeline_gpu
|
name: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu
|
||||||
path: /transformers/reports/${{ env.machine_type }}_tests_torch_pipeline_gpu
|
path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu
|
||||||
|
|
||||||
run_pipelines_tf_gpu:
|
run_pipelines_tf_gpu:
|
||||||
name: TensorFlow pipelines
|
name: TensorFlow pipelines
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
machines: [multi-gpu-docker, single-gpu-docker]
|
machine_type: [single-gpu, multi-gpu]
|
||||||
runs-on: ${{ matrix.machines }}
|
runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker') }}
|
||||||
container:
|
container:
|
||||||
image: huggingface/transformers-tensorflow-gpu
|
image: huggingface/transformers-tensorflow-gpu
|
||||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
needs: setup
|
needs: setup
|
||||||
steps:
|
steps:
|
||||||
# Set machine type, i.e. `single-gpu` or `multi-gpu`. Here we just remove `-docker`.
|
|
||||||
- name: Set machine type from ${{ matrix.machines }}
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
machine_type=${{ matrix.machines }}
|
|
||||||
machine_type=${machine_type/'-docker'/''}
|
|
||||||
echo "machine_type=$machine_type"
|
|
||||||
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
- name: Update clone
|
- name: Update clone
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: |
|
run: |
|
||||||
@@ -274,41 +238,32 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
RUN_PIPELINE_TESTS: yes
|
RUN_PIPELINE_TESTS: yes
|
||||||
run: |
|
run: |
|
||||||
python3 -m pytest -n 1 -v --dist=loadfile -m is_pipeline_test --make-reports=${{ env.machine_type }}_tests_tf_pipeline_gpu tests
|
python3 -m pytest -n 1 -v --dist=loadfile -m is_pipeline_test --make-reports=${{ matrix.machine_type }}_tests_tf_pipeline_gpu tests
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
run: |
|
run: |
|
||||||
cat /transformers/reports/${{ env.machine_type }}_tests_tf_pipeline_gpu/failures_short.txt
|
cat /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu/failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v2
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: ${{ env.machine_type }}_run_tests_tf_pipeline_gpu
|
name: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu
|
||||||
path: /transformers/reports/${{ env.machine_type }}_tests_tf_pipeline_gpu
|
path: /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu
|
||||||
|
|
||||||
run_all_tests_torch_cuda_extensions_gpu:
|
run_all_tests_torch_cuda_extensions_gpu:
|
||||||
name: Torch CUDA extension tests
|
name: Torch CUDA extension tests
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
machines: [multi-gpu-docker, single-gpu-docker]
|
machine_type: [single-gpu, multi-gpu]
|
||||||
runs-on: ${{ matrix.machines }}
|
runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker') }}
|
||||||
needs: setup
|
needs: setup
|
||||||
container:
|
container:
|
||||||
image: huggingface/transformers-pytorch-deepspeed-latest-gpu
|
image: huggingface/transformers-pytorch-deepspeed-latest-gpu
|
||||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
steps:
|
steps:
|
||||||
# Set machine type, i.e. `single-gpu` or `multi-gpu`. Here we just remove `-docker`.
|
|
||||||
- name: Set machine type from ${{ matrix.machines }}
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
machine_type=${{ matrix.machines }}
|
|
||||||
machine_type=${machine_type/'-docker'/''}
|
|
||||||
echo "machine_type=$machine_type"
|
|
||||||
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
- name: Update clone
|
- name: Update clone
|
||||||
working-directory: /workspace/transformers
|
working-directory: /workspace/transformers
|
||||||
run: git fetch && git checkout ${{ github.sha }}
|
run: git fetch && git checkout ${{ github.sha }}
|
||||||
@@ -324,19 +279,19 @@ jobs:
|
|||||||
- name: Run all tests on GPU
|
- name: Run all tests on GPU
|
||||||
working-directory: /workspace/transformers
|
working-directory: /workspace/transformers
|
||||||
run: |
|
run: |
|
||||||
python -m pytest -v --make-reports=${{ env.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended
|
python -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
run: cat /workspace/transformers/reports/${{ env.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt
|
run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v2
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: ${{ env.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports
|
name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports
|
||||||
path: /workspace/transformers/reports/${{ env.machine_type }}_tests_torch_cuda_extensions_gpu
|
path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
|
||||||
|
|
||||||
|
|
||||||
send_results:
|
send_results:
|
||||||
|
|||||||
@@ -621,7 +621,8 @@ if __name__ == "__main__":
|
|||||||
if "stats" in artifact:
|
if "stats" in artifact:
|
||||||
# Link to the GitHub Action job
|
# Link to the GitHub Action job
|
||||||
model_results[model]["job_link"] = github_actions_job_links.get(
|
model_results[model]["job_link"] = github_actions_job_links.get(
|
||||||
f"Model tests ({model}, {artifact_path['gpu']}-gpu)"
|
# The job names use `matrix.folder` which contain things like `models/bert` instead of `models_bert`
|
||||||
|
f"Model tests ({model.replace('models_', 'models/')}, {artifact_path['gpu']}-gpu)"
|
||||||
)
|
)
|
||||||
|
|
||||||
failed, success, time_spent = handle_test_results(artifact["stats"])
|
failed, success, time_spent = handle_test_results(artifact["stats"])
|
||||||
|
|||||||
Reference in New Issue
Block a user