Update self-push workflow (#17177)
* update push ci * install git-python * update comment * update deepspeed jobs * fix report * skip 2 more tests that require fairscale * Fix changes in test_fetcher.py (to deal with `setup.py` is changed) * set RUN_PT_TF_CROSS_TESTS=1 and final clean-up * remove SIGOPT_API_TOKEN * remove echo "$matrix_folders" Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
508
.github/workflows/self-push.yml
vendored
508
.github/workflows/self-push.yml
vendored
@@ -20,358 +20,192 @@ env:
|
|||||||
OMP_NUM_THREADS: 8
|
OMP_NUM_THREADS: 8
|
||||||
MKL_NUM_THREADS: 8
|
MKL_NUM_THREADS: 8
|
||||||
PYTEST_TIMEOUT: 60
|
PYTEST_TIMEOUT: 60
|
||||||
|
TF_FORCE_GPU_ALLOW_GROWTH: true
|
||||||
|
RUN_PT_TF_CROSS_TESTS: 1
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_tests_torch_gpu:
|
setup:
|
||||||
runs-on: [self-hosted, docker-gpu, single-gpu]
|
name: Setup
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
outputs:
|
||||||
|
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||||
|
test_map: ${{ steps.set-matrix.outputs.test_map }}
|
||||||
|
steps:
|
||||||
|
- name: Checkout transformers
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
fetch-depth: 2
|
||||||
|
|
||||||
|
- name: Cleanup
|
||||||
|
run: |
|
||||||
|
rm -rf tests/__pycache__
|
||||||
|
rm -rf tests/models/__pycache__
|
||||||
|
rm -rf reports
|
||||||
|
|
||||||
|
- name: Fetch the tests to run
|
||||||
|
# TODO: add `git-python` in the docker images
|
||||||
|
run: |
|
||||||
|
pip install --upgrade git-python
|
||||||
|
python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
|
||||||
|
|
||||||
|
- name: Report fetched tests
|
||||||
|
uses: actions/upload-artifact@v2
|
||||||
|
with:
|
||||||
|
name: test_fetched
|
||||||
|
path: test_preparation.txt
|
||||||
|
|
||||||
|
- id: set-matrix
|
||||||
|
name: Organize tests into models
|
||||||
|
# The `keys` is used as GitHub actions matrix for jobs, i.e. `models/bert`, `tokenization`, `pipeline`, etc.
|
||||||
|
# The `test_map` is used to get the actual identified test files under each key.
|
||||||
|
# If no test to run (so no `test_map.json` file), create a dummy map (empty matrix will fail)
|
||||||
|
run: |
|
||||||
|
if [ -f test_map.json ]; then
|
||||||
|
keys=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); d = list(test_map.keys()); print(d)')
|
||||||
|
test_map=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); print(test_map)')
|
||||||
|
else
|
||||||
|
keys=$(python3 -c 'keys = ["dummy"]; print(keys)')
|
||||||
|
test_map=$(python3 -c 'test_map = {"dummy": []}; print(test_map)')
|
||||||
|
fi
|
||||||
|
echo $keys
|
||||||
|
echo $test_map
|
||||||
|
echo "::set-output name=matrix::$keys"
|
||||||
|
echo "::set-output name=test_map::$test_map"
|
||||||
|
|
||||||
|
run_tests_single_gpu:
|
||||||
|
name: Model Tests on single GPU
|
||||||
|
needs: setup
|
||||||
|
# `dummy` means there is no test to run
|
||||||
|
if: contains(fromJson(needs.setup.outputs.matrix), 'dummy') != true
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
|
||||||
|
machines: [single-gpu]
|
||||||
|
runs-on: [self-hosted, docker-gpu, '${{ matrix.machines }}']
|
||||||
container:
|
container:
|
||||||
image: pytorch/pytorch:1.9.0-cuda11.1-cudnn8-runtime
|
image: huggingface/transformers-all-latest-gpu
|
||||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
steps:
|
steps:
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
apt -y update && apt install -y software-properties-common && apt -y update && add-apt-repository -y ppa:git-core/ppa && apt -y update && apt install -y git
|
|
||||||
apt install -y libsndfile1-dev espeak-ng
|
|
||||||
pip install --upgrade pip
|
|
||||||
pip install .[sklearn,testing,onnxruntime,sentencepiece,torch-speech,vision,timm]
|
|
||||||
pip install https://github.com/kpu/kenlm/archive/master.zip
|
|
||||||
|
|
||||||
- name: Launcher docker
|
|
||||||
uses: actions/checkout@v2
|
|
||||||
with:
|
|
||||||
fetch-depth: 2
|
|
||||||
|
|
||||||
- name: NVIDIA-SMI
|
- name: NVIDIA-SMI
|
||||||
run: |
|
run: |
|
||||||
nvidia-smi
|
nvidia-smi
|
||||||
|
|
||||||
- name: Are GPUs recognized by our DL frameworks
|
- name: Are GPUs recognized by our DL frameworks
|
||||||
|
working-directory: /transformers
|
||||||
run: |
|
run: |
|
||||||
utils/print_env_pt.py
|
utils/print_env_pt.py
|
||||||
|
TF_CPP_MIN_LOG_LEVEL=3 python3 -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
|
||||||
|
TF_CPP_MIN_LOG_LEVEL=3 python3 -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
|
||||||
|
|
||||||
- name: Fetch the tests to run
|
- name: Echo folder ${{ matrix.folders }}
|
||||||
|
shell: bash
|
||||||
|
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
|
||||||
|
# set the artifact folder names (because the character `/` is not allowed).
|
||||||
run: |
|
run: |
|
||||||
python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
|
echo "${{ matrix.folders }}"
|
||||||
|
echo "${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}"
|
||||||
|
matrix_folders=${{ matrix.folders }}
|
||||||
|
matrix_folders=${matrix_folders/'models/'/'models_'}
|
||||||
|
echo "$matrix_folders"
|
||||||
|
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Report fetched tests
|
- name: Update clone
|
||||||
uses: actions/upload-artifact@v2
|
working-directory: /transformers
|
||||||
with:
|
run: git fetch && git checkout ${{ github.sha }}
|
||||||
name: test_fetched
|
|
||||||
path: test_preparation.txt
|
|
||||||
|
|
||||||
- name: Run all non-slow tests on GPU
|
- name: Run all non-slow selected tests on GPU
|
||||||
|
working-directory: /transformers
|
||||||
run: |
|
run: |
|
||||||
if [ -f test_list.txt ]; then
|
python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machines }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}
|
||||||
python -m pytest -n 2 --dist=loadfile -v --make-reports=tests_torch_gpu $(cat test_list.txt)
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
run: cat reports/tests_torch_gpu/failures_short.txt
|
continue-on-error: true
|
||||||
|
run: cat /transformers/reports/${{ matrix.machines }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v2
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: run_all_tests_torch_gpu_test_reports
|
name: ${{ matrix.machines }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
||||||
path: reports
|
path: /transformers/reports/${{ matrix.machines }}_tests_gpu_${{ matrix.folders }}
|
||||||
|
|
||||||
# run_tests_flax_gpu:
|
run_tests_multi_gpu:
|
||||||
# runs-on: [self-hosted, docker-gpu-test, single-gpu]
|
name: Model Tests on multi GPUs
|
||||||
# container:
|
needs: setup
|
||||||
# image: tensorflow/tensorflow:2.4.1-gpu
|
# `dummy` means there is no test to run
|
||||||
# options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
if: contains(fromJson(needs.setup.outputs.matrix), 'dummy') != true
|
||||||
# steps:
|
strategy:
|
||||||
# - name: Set up Python 3.7
|
fail-fast: false
|
||||||
# uses: actions/setup-python@v2
|
matrix:
|
||||||
# with:
|
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
|
||||||
# python-version: 3.7
|
machines: [multi-gpu]
|
||||||
#
|
runs-on: [self-hosted, docker-gpu, '${{ matrix.machines }}']
|
||||||
# - name: Install dependencies
|
|
||||||
# run: |
|
|
||||||
# apt -y update && apt install -y software-properties-common && apt -y update && add-apt-repository -y ppa:git-core/ppa && apt -y update && apt install -y git espeak-ng
|
|
||||||
# pip install --upgrade "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html
|
|
||||||
# pip install --upgrade pip
|
|
||||||
# pip install .[sklearn,testing,sentencepiece,flax,flax-speech,vision]
|
|
||||||
#
|
|
||||||
# - name: Launcher docker
|
|
||||||
# uses: actions/checkout@v2
|
|
||||||
# with:
|
|
||||||
# fetch-depth: 2
|
|
||||||
#
|
|
||||||
# - name: NVIDIA-SMI
|
|
||||||
# continue-on-error: true
|
|
||||||
# run: |
|
|
||||||
# nvidia-smi
|
|
||||||
#
|
|
||||||
# - name: Are GPUs recognized by our DL frameworks
|
|
||||||
# run: |
|
|
||||||
# python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)"
|
|
||||||
# python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))"
|
|
||||||
#
|
|
||||||
# - name: Fetch the tests to run
|
|
||||||
# run: |
|
|
||||||
# python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
|
|
||||||
#
|
|
||||||
# - name: Report fetched tests
|
|
||||||
# uses: actions/upload-artifact@v2
|
|
||||||
# with:
|
|
||||||
# name: test_fetched
|
|
||||||
# path: test_preparation.txt
|
|
||||||
#
|
|
||||||
# - name: Run all non-slow tests on GPU
|
|
||||||
# run: |
|
|
||||||
# if [ -f test_list.txt ]; then
|
|
||||||
# python -m pytest -n 2 --dist=loadfile -v --make-reports=tests_flax_gpu $(cat test_list.txt)
|
|
||||||
# fi
|
|
||||||
#
|
|
||||||
# - name: Failure short reports
|
|
||||||
# if: ${{ failure() }}
|
|
||||||
# run: cat reports/tests_flax_gpu/failures_short.txt
|
|
||||||
#
|
|
||||||
# - name: Test suite reports artifacts
|
|
||||||
# if: ${{ always() }}
|
|
||||||
# uses: actions/upload-artifact@v2
|
|
||||||
# with:
|
|
||||||
# name: run_all_tests_flax_gpu_test_reports
|
|
||||||
# path: reports
|
|
||||||
#
|
|
||||||
# run_tests_tf_gpu:
|
|
||||||
# runs-on: [self-hosted, docker-gpu, single-gpu]
|
|
||||||
# timeout-minutes: 120
|
|
||||||
# container:
|
|
||||||
# image: tensorflow/tensorflow:2.4.1-gpu
|
|
||||||
# options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
|
||||||
# steps:
|
|
||||||
# - name: Install dependencies
|
|
||||||
# run: |
|
|
||||||
# apt -y update && apt install -y software-properties-common && apt -y update && add-apt-repository -y ppa:git-core/ppa && apt -y update && apt install -y git espeak-ng
|
|
||||||
# pip install --upgrade pip
|
|
||||||
# pip install .[sklearn,testing,onnxruntime,sentencepiece,tf-speech]
|
|
||||||
# pip install https://github.com/kpu/kenlm/archive/master.zip
|
|
||||||
#
|
|
||||||
# - name: Launcher docker
|
|
||||||
# uses: actions/checkout@v2
|
|
||||||
# with:
|
|
||||||
# fetch-depth: 2
|
|
||||||
#
|
|
||||||
# - name: NVIDIA-SMI
|
|
||||||
# run: |
|
|
||||||
# nvidia-smi
|
|
||||||
#
|
|
||||||
# - name: Are GPUs recognized by our DL frameworks
|
|
||||||
# run: |
|
|
||||||
# TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
|
|
||||||
# TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
|
|
||||||
#
|
|
||||||
# - name: Fetch the tests to run
|
|
||||||
# run: |
|
|
||||||
# python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
|
|
||||||
#
|
|
||||||
# - name: Report fetched tests
|
|
||||||
# uses: actions/upload-artifact@v2
|
|
||||||
# with:
|
|
||||||
# name: test_fetched
|
|
||||||
# path: test_preparation.txt
|
|
||||||
#
|
|
||||||
# - name: Run all non-slow tests on GPU
|
|
||||||
# env:
|
|
||||||
# TF_NUM_INTRAOP_THREADS: 8
|
|
||||||
# TF_NUM_INTEROP_THREADS: 1
|
|
||||||
# run: |
|
|
||||||
# if [ -f test_list.txt ]; then
|
|
||||||
# python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_gpu $(cat test_list.txt)
|
|
||||||
# fi
|
|
||||||
#
|
|
||||||
# - name: Failure short reports
|
|
||||||
# if: ${{ failure() }}
|
|
||||||
# run: cat reports/tests_tf_gpu/failures_short.txt
|
|
||||||
#
|
|
||||||
# - name: Test suite reports artifacts
|
|
||||||
# if: ${{ always() }}
|
|
||||||
# uses: actions/upload-artifact@v2
|
|
||||||
# with:
|
|
||||||
# name: run_all_tests_tf_gpu_test_reports
|
|
||||||
# path: reports
|
|
||||||
|
|
||||||
|
|
||||||
run_tests_torch_multi_gpu:
|
|
||||||
runs-on: [self-hosted, docker-gpu, multi-gpu]
|
|
||||||
container:
|
container:
|
||||||
image: pytorch/pytorch:1.9.0-cuda11.1-cudnn8-runtime
|
image: huggingface/transformers-all-latest-gpu
|
||||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
steps:
|
steps:
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
apt -y update && apt install -y software-properties-common && apt -y update && add-apt-repository -y ppa:git-core/ppa && apt -y update && apt install -y git espeak-ng
|
|
||||||
apt install -y libsndfile1-dev espeak-ng
|
|
||||||
pip install --upgrade pip
|
|
||||||
pip install .[sklearn,testing,onnxruntime,sentencepiece,torch-speech,vision,timm]
|
|
||||||
pip install https://github.com/kpu/kenlm/archive/master.zip
|
|
||||||
- name: Launcher docker
|
|
||||||
uses: actions/checkout@v2
|
|
||||||
with:
|
|
||||||
fetch-depth: 2
|
|
||||||
|
|
||||||
- name: NVIDIA-SMI
|
- name: NVIDIA-SMI
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
run: |
|
||||||
nvidia-smi
|
nvidia-smi
|
||||||
|
|
||||||
- name: Are GPUs recognized by our DL frameworks
|
- name: Are GPUs recognized by our DL frameworks
|
||||||
|
working-directory: /transformers
|
||||||
run: |
|
run: |
|
||||||
utils/print_env_pt.py
|
utils/print_env_pt.py
|
||||||
|
TF_CPP_MIN_LOG_LEVEL=3 python3 -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
|
||||||
|
TF_CPP_MIN_LOG_LEVEL=3 python3 -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
|
||||||
|
|
||||||
- name: Fetch the tests to run
|
- name: Echo folder ${{ matrix.folders }}
|
||||||
|
shell: bash
|
||||||
|
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
|
||||||
|
# set the artifact folder names (because the character `/` is not allowed).
|
||||||
run: |
|
run: |
|
||||||
python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
|
echo "${{ matrix.folders }}"
|
||||||
|
echo "${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}"
|
||||||
|
matrix_folders=${{ matrix.folders }}
|
||||||
|
matrix_folders=${matrix_folders/'models/'/'models_'}
|
||||||
|
echo "$matrix_folders"
|
||||||
|
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Report fetched tests
|
- name: Update clone
|
||||||
uses: actions/upload-artifact@v2
|
working-directory: /transformers
|
||||||
with:
|
run: git fetch && git checkout ${{ github.sha }}
|
||||||
name: test_fetched
|
|
||||||
path: test_preparation.txt
|
|
||||||
|
|
||||||
- name: Run all non-slow tests on GPU
|
- name: Run all non-slow selected tests on GPU
|
||||||
env:
|
env:
|
||||||
MKL_SERVICE_FORCE_INTEL: 1
|
MKL_SERVICE_FORCE_INTEL: 1
|
||||||
|
working-directory: /transformers
|
||||||
run: |
|
run: |
|
||||||
if [ -f test_list.txt ]; then
|
python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machines }}_tests_gpu_${{ matrix.folders }} ${{ fromJson(needs.setup.outputs.test_map)[matrix.folders] }}
|
||||||
python -m pytest -n 2 --dist=loadfile -v --make-reports=tests_torch_multi_gpu $(cat test_list.txt)
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
run: cat reports/tests_torch_multi_gpu/failures_short.txt
|
continue-on-error: true
|
||||||
|
run: cat /transformers/reports/${{ matrix.machines }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v2
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: run_all_tests_torch_multi_gpu_test_reports
|
name: ${{ matrix.machines }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
||||||
path: reports
|
path: /transformers/reports/${{ matrix.machines }}_tests_gpu_${{ matrix.folders }}
|
||||||
|
|
||||||
# run_tests_flax_multi_gpu:
|
run_tests_torch_cuda_extensions_single_gpu:
|
||||||
# runs-on: [self-hosted, docker-gpu, multi-gpu]
|
name: Torch CUDA extension tests on single GPU
|
||||||
# container:
|
needs: setup
|
||||||
# image: tensorflow/tensorflow:2.4.1-gpu
|
if: contains(fromJson(needs.setup.outputs.matrix), 'deepspeed') || contains(fromJson(needs.setup.outputs.matrix), 'extended')
|
||||||
# options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
strategy:
|
||||||
# steps:
|
fail-fast: false
|
||||||
# - name: Install dependencies
|
matrix:
|
||||||
# run: |
|
machines: [single-gpu]
|
||||||
# apt -y update && apt install -y software-properties-common && apt -y update && add-apt-repository -y ppa:git-core/ppa && apt -y update && apt install -y git espeak-ng
|
runs-on: [self-hosted, docker-gpu, '${{ matrix.machines }}']
|
||||||
# pip install --upgrade "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html
|
|
||||||
# pip install --upgrade pip
|
|
||||||
# pip install .[sklearn,testing,sentencepiece,flax,flax-speech,vision]
|
|
||||||
# pip install https://github.com/kpu/kenlm/archive/master.zip
|
|
||||||
#
|
|
||||||
# - name: Launcher docker
|
|
||||||
# uses: actions/checkout@v2
|
|
||||||
# with:
|
|
||||||
# fetch-depth: 2
|
|
||||||
#
|
|
||||||
# - name: NVIDIA-SMI
|
|
||||||
# continue-on-error: true
|
|
||||||
# run: |
|
|
||||||
# nvidia-smi
|
|
||||||
#
|
|
||||||
# - name: Are GPUs recognized by our DL frameworks
|
|
||||||
# run: |
|
|
||||||
# python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)"
|
|
||||||
# python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))"
|
|
||||||
#
|
|
||||||
# - name: Fetch the tests to run
|
|
||||||
# run: |
|
|
||||||
# python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
|
|
||||||
#
|
|
||||||
# - name: Report fetched tests
|
|
||||||
# uses: actions/upload-artifact@v2
|
|
||||||
# with:
|
|
||||||
# name: test_fetched
|
|
||||||
# path: test_preparation.txt
|
|
||||||
#
|
|
||||||
# - name: Run all non-slow tests on GPU
|
|
||||||
# run: |
|
|
||||||
# if [ -f test_list.txt ]; then
|
|
||||||
# python -m pytest -n 2 --dist=loadfile -v --make-reports=tests_flax_multi_gpu $(cat test_list.txt)
|
|
||||||
# fi
|
|
||||||
#
|
|
||||||
# - name: Failure short reports
|
|
||||||
# if: ${{ failure() }}
|
|
||||||
# run: cat reports/tests_flax_multi_gpu/failures_short.txt
|
|
||||||
#
|
|
||||||
# - name: Test suite reports artifacts
|
|
||||||
# if: ${{ always() }}
|
|
||||||
# uses: actions/upload-artifact@v2
|
|
||||||
# with:
|
|
||||||
# name: run_all_tests_flax_multi_gpu_test_reports
|
|
||||||
# path: reports
|
|
||||||
|
|
||||||
# run_tests_tf_multi_gpu:
|
|
||||||
# runs-on: [self-hosted, docker-gpu, multi-gpu]
|
|
||||||
# timeout-minutes: 120
|
|
||||||
# container:
|
|
||||||
# image: tensorflow/tensorflow:2.4.1-gpu
|
|
||||||
# options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
|
||||||
# steps:
|
|
||||||
# - name: Install dependencies
|
|
||||||
# run: |
|
|
||||||
# apt -y update && apt install -y software-properties-common && apt -y update && add-apt-repository -y ppa:git-core/ppa && apt -y update && apt install -y git espeak-ng
|
|
||||||
# pip install --upgrade pip
|
|
||||||
# pip install .[sklearn,testing,onnxruntime,sentencepiece,tf-speech]
|
|
||||||
# pip install https://github.com/kpu/kenlm/archive/master.zip
|
|
||||||
#
|
|
||||||
# - name: Launcher docker
|
|
||||||
# uses: actions/checkout@v2
|
|
||||||
# with:
|
|
||||||
# fetch-depth: 2
|
|
||||||
#
|
|
||||||
# - name: NVIDIA-SMI
|
|
||||||
# run: |
|
|
||||||
# nvidia-smi
|
|
||||||
#
|
|
||||||
# - name: Are GPUs recognized by our DL frameworks
|
|
||||||
# run: |
|
|
||||||
# TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
|
|
||||||
# TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
|
|
||||||
#
|
|
||||||
# - name: Fetch the tests to run
|
|
||||||
# run: |
|
|
||||||
# python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
|
|
||||||
#
|
|
||||||
# - name: Report fetched tests
|
|
||||||
# uses: actions/upload-artifact@v2
|
|
||||||
# with:
|
|
||||||
# name: test_fetched
|
|
||||||
# path: test_preparation.txt
|
|
||||||
#
|
|
||||||
# - name: Run all non-slow tests on GPU
|
|
||||||
# env:
|
|
||||||
# TF_NUM_INTRAOP_THREADS: 8
|
|
||||||
# TF_NUM_INTEROP_THREADS: 1
|
|
||||||
# run: |
|
|
||||||
# if [ -f test_list.txt ]; then
|
|
||||||
# python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_multi_gpu $(cat test_list.txt)
|
|
||||||
# fi
|
|
||||||
#
|
|
||||||
# - name: Failure short reports
|
|
||||||
# if: ${{ failure() }}
|
|
||||||
# run: cat reports/tests_tf_multi_gpu/failures_short.txt
|
|
||||||
#
|
|
||||||
# - name: Test suite reports artifacts
|
|
||||||
# if: ${{ always() }}
|
|
||||||
# uses: actions/upload-artifact@v2
|
|
||||||
# with:
|
|
||||||
# name: run_all_tests_tf_multi_gpu_test_reports
|
|
||||||
# path: reports
|
|
||||||
|
|
||||||
run_tests_torch_cuda_extensions_gpu:
|
|
||||||
runs-on: [self-hosted, docker-gpu, single-gpu]
|
|
||||||
container:
|
container:
|
||||||
image: nvcr.io/nvidia/pytorch:21.03-py3
|
image: nvcr.io/nvidia/pytorch:21.03-py3
|
||||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
steps:
|
steps:
|
||||||
- name: Launcher docker
|
- name: Checkout transformers
|
||||||
uses: actions/checkout@v2
|
uses: actions/checkout@v2
|
||||||
with:
|
with:
|
||||||
fetch-depth: 2
|
fetch-depth: 2
|
||||||
@@ -390,46 +224,42 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
utils/print_env_pt.py
|
utils/print_env_pt.py
|
||||||
|
|
||||||
- name: Fetch the tests to run
|
- name: Run all non-slow selected tests on GPU
|
||||||
|
# TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
|
||||||
run: |
|
run: |
|
||||||
python utils/tests_fetcher.py --diff_with_last_commit --filters tests/deepspeed tests/extended | tee test_preparation.txt
|
python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ matrix.machines }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended
|
||||||
|
|
||||||
- name: Report fetched tests
|
|
||||||
uses: actions/upload-artifact@v2
|
|
||||||
with:
|
|
||||||
name: test_fetched
|
|
||||||
path: test_preparation.txt
|
|
||||||
|
|
||||||
- name: Run all tests on GPU
|
|
||||||
run: |
|
|
||||||
if [ -f test_list.txt ]; then
|
|
||||||
python -m pytest -n 1 --dist=loadfile -v --make-reports=tests_torch_cuda_extensions_gpu $(cat test_list.txt)
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
run: cat reports/tests_torch_cuda_extensions_gpu/failures_short.txt
|
continue-on-error: true
|
||||||
|
run: cat reports/${{ matrix.machines }}_tests_torch_cuda_extensions_gpu/failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v2
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: run_tests_torch_cuda_extensions_gpu_test_reports
|
name: ${{ matrix.machines }}_run_tests_torch_cuda_extensions_gpu_test_reports
|
||||||
path: reports
|
path: reports/${{ matrix.machines }}_tests_torch_cuda_extensions_gpu
|
||||||
|
|
||||||
run_tests_torch_cuda_extensions_multi_gpu:
|
run_tests_torch_cuda_extensions_multi_gpu:
|
||||||
runs-on: [self-hosted, docker-gpu, multi-gpu]
|
name: Torch CUDA extension tests on multi GPUs
|
||||||
|
needs: setup
|
||||||
|
if: contains(fromJson(needs.setup.outputs.matrix), 'deepspeed') || contains(fromJson(needs.setup.outputs.matrix), 'extended')
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
machines: [multi-gpu]
|
||||||
|
runs-on: [self-hosted, docker-gpu, '${{ matrix.machines }}']
|
||||||
container:
|
container:
|
||||||
image: nvcr.io/nvidia/pytorch:21.03-py3
|
image: nvcr.io/nvidia/pytorch:21.03-py3
|
||||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
steps:
|
steps:
|
||||||
- name: Launcher docker
|
- name: Checkout transformers
|
||||||
uses: actions/checkout@v2
|
uses: actions/checkout@v2
|
||||||
with:
|
with:
|
||||||
fetch-depth: 2
|
fetch-depth: 2
|
||||||
|
|
||||||
- name: NVIDIA-SMI
|
- name: NVIDIA-SMI
|
||||||
continue-on-error: true
|
|
||||||
run: |
|
run: |
|
||||||
nvidia-smi
|
nvidia-smi
|
||||||
|
|
||||||
@@ -444,56 +274,46 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
utils/print_env_pt.py
|
utils/print_env_pt.py
|
||||||
|
|
||||||
- name: Fetch the tests to run
|
- name: Run all non-slow selected tests on GPU
|
||||||
|
# TODO: Here we pass all tests in the 2 folders for simplicity. It's better to pass only the identified tests.
|
||||||
run: |
|
run: |
|
||||||
python utils/tests_fetcher.py --diff_with_last_commit --filters tests/deepspeed tests/extended | tee test_preparation.txt
|
python -m pytest -n 1 --dist=loadfile -v --make-reports=${{ matrix.machines }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended
|
||||||
|
|
||||||
- name: Report fetched tests
|
|
||||||
uses: actions/upload-artifact@v2
|
|
||||||
with:
|
|
||||||
name: test_fetched
|
|
||||||
path: test_preparation.txt
|
|
||||||
|
|
||||||
- name: Run all tests on GPU
|
|
||||||
run: |
|
|
||||||
if [ -f test_list.txt ]; then
|
|
||||||
python -m pytest -n 1 --dist=loadfile -v --make-reports=tests_torch_cuda_extensions_multi_gpu $(cat test_list.txt)
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
run: cat reports/tests_torch_cuda_extensions_multi_gpu/failures_short.txt
|
continue-on-error: true
|
||||||
|
run: cat reports/${{ matrix.machines }}_tests_torch_cuda_extensions_gpu/failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v2
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: run_tests_torch_cuda_extensions_multi_gpu_test_reports
|
name: ${{ matrix.machines }}_run_tests_torch_cuda_extensions_gpu_test_reports
|
||||||
path: reports
|
path: reports/${{ matrix.machines }}_tests_torch_cuda_extensions_gpu
|
||||||
|
|
||||||
|
|
||||||
send_results:
|
send_results:
|
||||||
name: Send results to webhook
|
name: Send results to webhook
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
if: always()
|
if: always()
|
||||||
needs: [
|
needs: [
|
||||||
run_tests_torch_gpu,
|
setup,
|
||||||
# run_tests_tf_gpu,
|
run_tests_single_gpu,
|
||||||
run_tests_torch_multi_gpu,
|
run_tests_multi_gpu,
|
||||||
# run_tests_tf_multi_gpu,
|
run_tests_torch_cuda_extensions_single_gpu,
|
||||||
run_tests_torch_cuda_extensions_gpu,
|
|
||||||
run_tests_torch_cuda_extensions_multi_gpu
|
run_tests_torch_cuda_extensions_multi_gpu
|
||||||
]
|
]
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
|
|
||||||
- uses: actions/download-artifact@v2
|
- uses: actions/download-artifact@v2
|
||||||
|
|
||||||
- name: Send message to Slack
|
- name: Send message to Slack
|
||||||
env:
|
env:
|
||||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||||
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
||||||
|
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
|
||||||
|
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
|
||||||
|
CI_EVENT: push
|
||||||
|
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
|
||||||
|
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
|
||||||
run: |
|
run: |
|
||||||
pip install slack_sdk
|
pip install slack_sdk
|
||||||
python utils/notification_service_deprecated.py push
|
python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
|
||||||
|
|||||||
88
.github/workflows/self-scheduled.yml
vendored
88
.github/workflows/self-scheduled.yml
vendored
@@ -83,30 +83,38 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
echo "${{ matrix.folders }}"
|
echo "${{ matrix.folders }}"
|
||||||
matrix_folders=${{ matrix.folders }}
|
matrix_folders=${{ matrix.folders }}
|
||||||
echo "$matrix_folders"
|
|
||||||
matrix_folders=${matrix_folders/'models/'/'models_'}
|
matrix_folders=${matrix_folders/'models/'/'models_'}
|
||||||
echo "$matrix_folders"
|
echo "$matrix_folders"
|
||||||
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
# Set machine type, i.e. `single-gpu` or `multi-gpu`. Here we just remove `-docker`.
|
||||||
|
- name: Set machine type from ${{ matrix.machines }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
machine_type=${{ matrix.machines }}
|
||||||
|
machine_type=${machine_type/'-docker'/''}
|
||||||
|
echo "machine_type=$machine_type"
|
||||||
|
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Update clone
|
- name: Update clone
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: git fetch && git checkout ${{ github.sha }}
|
run: git fetch && git checkout ${{ github.sha }}
|
||||||
|
|
||||||
- name: Run all tests on GPU
|
- name: Run all tests on GPU
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: python3 -m pytest -v --make-reports=${{ matrix.machines }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
|
run: python3 -m pytest -v --make-reports=${{ env.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
run: cat /transformers/reports/${{ matrix.machines }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
|
run: cat /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v2
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.machines }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
name: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
||||||
path: /transformers/reports/${{ matrix.machines }}_tests_gpu_${{ matrix.folders }}
|
path: /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}
|
||||||
|
|
||||||
run_tests_multi_gpu:
|
run_tests_multi_gpu:
|
||||||
name: Model tests
|
name: Model tests
|
||||||
@@ -128,30 +136,38 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
echo "${{ matrix.folders }}"
|
echo "${{ matrix.folders }}"
|
||||||
matrix_folders=${{ matrix.folders }}
|
matrix_folders=${{ matrix.folders }}
|
||||||
echo "$matrix_folders"
|
|
||||||
matrix_folders=${matrix_folders/'models/'/'models_'}
|
matrix_folders=${matrix_folders/'models/'/'models_'}
|
||||||
echo "$matrix_folders"
|
echo "$matrix_folders"
|
||||||
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
# Set machine type, i.e. `single-gpu` or `multi-gpu`. Here we just remove `-docker`.
|
||||||
|
- name: Set machine type from ${{ matrix.machines }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
machine_type=${{ matrix.machines }}
|
||||||
|
machine_type=${machine_type/'-docker'/''}
|
||||||
|
echo "machine_type=$machine_type"
|
||||||
|
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Update clone
|
- name: Update clone
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: git fetch && git checkout ${{ github.sha }}
|
run: git fetch && git checkout ${{ github.sha }}
|
||||||
|
|
||||||
- name: Run all tests on GPU
|
- name: Run all tests on GPU
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: python3 -m pytest -v --make-reports=${{ matrix.machines }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
|
run: python3 -m pytest -v --make-reports=${{ env.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }}
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
run: cat /transformers/reports/${{ matrix.machines }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
|
run: cat /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v2
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.machines }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
name: ${{ env.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
||||||
path: /transformers/reports/${{ matrix.machines }}_tests_gpu_${{ matrix.folders }}
|
path: /transformers/reports/${{ env.machine_type }}_tests_gpu_${{ matrix.folders }}
|
||||||
|
|
||||||
run_examples_gpu:
|
run_examples_gpu:
|
||||||
name: Examples directory
|
name: Examples directory
|
||||||
@@ -195,6 +211,15 @@ jobs:
|
|||||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
needs: setup
|
needs: setup
|
||||||
steps:
|
steps:
|
||||||
|
# Set machine type, i.e. `single-gpu` or `multi-gpu`. Here we just remove `-docker`.
|
||||||
|
- name: Set machine type from ${{ matrix.machines }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
machine_type=${{ matrix.machines }}
|
||||||
|
machine_type=${machine_type/'-docker'/''}
|
||||||
|
echo "machine_type=$machine_type"
|
||||||
|
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Update clone
|
- name: Update clone
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: git fetch && git checkout ${{ github.sha }}
|
run: git fetch && git checkout ${{ github.sha }}
|
||||||
@@ -204,19 +229,19 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
RUN_PIPELINE_TESTS: yes
|
RUN_PIPELINE_TESTS: yes
|
||||||
run: |
|
run: |
|
||||||
python3 -m pytest -n 1 -v --dist=loadfile -m is_pipeline_test --make-reports=${{ matrix.machines }}_tests_torch_pipeline_gpu tests
|
python3 -m pytest -n 1 -v --dist=loadfile -m is_pipeline_test --make-reports=${{ env.machine_type }}_tests_torch_pipeline_gpu tests
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
run: cat /transformers/reports/${{ matrix.machines }}_tests_torch_pipeline_gpu/failures_short.txt
|
run: cat /transformers/reports/${{ env.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v2
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.machines }}_run_tests_torch_pipeline_gpu
|
name: ${{ env.machine_type }}_run_tests_torch_pipeline_gpu
|
||||||
path: /transformers/reports/${{ matrix.machines }}_tests_torch_pipeline_gpu
|
path: /transformers/reports/${{ env.machine_type }}_tests_torch_pipeline_gpu
|
||||||
|
|
||||||
run_pipelines_tf_gpu:
|
run_pipelines_tf_gpu:
|
||||||
name: TensorFlow pipelines
|
name: TensorFlow pipelines
|
||||||
@@ -230,6 +255,15 @@ jobs:
|
|||||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
needs: setup
|
needs: setup
|
||||||
steps:
|
steps:
|
||||||
|
# Set machine type, i.e. `single-gpu` or `multi-gpu`. Here we just remove `-docker`.
|
||||||
|
- name: Set machine type from ${{ matrix.machines }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
machine_type=${{ matrix.machines }}
|
||||||
|
machine_type=${machine_type/'-docker'/''}
|
||||||
|
echo "machine_type=$machine_type"
|
||||||
|
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Update clone
|
- name: Update clone
|
||||||
working-directory: /transformers
|
working-directory: /transformers
|
||||||
run: |
|
run: |
|
||||||
@@ -240,19 +274,19 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
RUN_PIPELINE_TESTS: yes
|
RUN_PIPELINE_TESTS: yes
|
||||||
run: |
|
run: |
|
||||||
python3 -m pytest -n 1 -v --dist=loadfile -m is_pipeline_test --make-reports=${{ matrix.machines }}_tests_tf_pipeline_gpu tests
|
python3 -m pytest -n 1 -v --dist=loadfile -m is_pipeline_test --make-reports=${{ env.machine_type }}_tests_tf_pipeline_gpu tests
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
run: |
|
run: |
|
||||||
cat /transformers/reports/${{ matrix.machines }}_tests_tf_pipeline_gpu/failures_short.txt
|
cat /transformers/reports/${{ env.machine_type }}_tests_tf_pipeline_gpu/failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v2
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.machines }}_run_tests_tf_pipeline_gpu
|
name: ${{ env.machine_type }}_run_tests_tf_pipeline_gpu
|
||||||
path: /transformers/reports/${{ matrix.machines }}_tests_tf_pipeline_gpu
|
path: /transformers/reports/${{ env.machine_type }}_tests_tf_pipeline_gpu
|
||||||
|
|
||||||
run_all_tests_torch_cuda_extensions_gpu:
|
run_all_tests_torch_cuda_extensions_gpu:
|
||||||
name: Torch CUDA extension tests
|
name: Torch CUDA extension tests
|
||||||
@@ -266,6 +300,15 @@ jobs:
|
|||||||
image: huggingface/transformers-pytorch-deepspeed-latest-gpu
|
image: huggingface/transformers-pytorch-deepspeed-latest-gpu
|
||||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
steps:
|
steps:
|
||||||
|
# Set machine type, i.e. `single-gpu` or `multi-gpu`. Here we just remove `-docker`.
|
||||||
|
- name: Set machine type from ${{ matrix.machines }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
machine_type=${{ matrix.machines }}
|
||||||
|
machine_type=${machine_type/'-docker'/''}
|
||||||
|
echo "machine_type=$machine_type"
|
||||||
|
echo "machine_type=$machine_type" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Update clone
|
- name: Update clone
|
||||||
working-directory: /workspace/transformers
|
working-directory: /workspace/transformers
|
||||||
run: git fetch && git checkout ${{ github.sha }}
|
run: git fetch && git checkout ${{ github.sha }}
|
||||||
@@ -281,19 +324,19 @@ jobs:
|
|||||||
- name: Run all tests on GPU
|
- name: Run all tests on GPU
|
||||||
working-directory: /workspace/transformers
|
working-directory: /workspace/transformers
|
||||||
run: |
|
run: |
|
||||||
python -m pytest -v --make-reports=${{ matrix.machines }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended
|
python -m pytest -v --make-reports=${{ env.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ failure() }}
|
if: ${{ failure() }}
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
run: cat /workspace/transformers/reports/${{ matrix.machines }}_tests_torch_cuda_extensions_gpu/failures_short.txt
|
run: cat /workspace/transformers/reports/${{ env.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v2
|
uses: actions/upload-artifact@v2
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.machines }}_run_tests_torch_cuda_extensions_gpu_test_reports
|
name: ${{ env.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports
|
||||||
path: /workspace/transformers/reports/${{ matrix.machines }}_tests_torch_cuda_extensions_gpu
|
path: /workspace/transformers/reports/${{ env.machine_type }}_tests_torch_cuda_extensions_gpu
|
||||||
|
|
||||||
|
|
||||||
send_results:
|
send_results:
|
||||||
@@ -310,6 +353,7 @@ jobs:
|
|||||||
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
||||||
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
|
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
|
||||||
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
|
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
|
||||||
|
CI_EVENT: scheduled
|
||||||
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
|
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
|
||||||
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
|
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
@@ -105,6 +105,7 @@ class TestTrainerExt(TestCasePlus):
|
|||||||
self.run_seq2seq_quick(distributed=True)
|
self.run_seq2seq_quick(distributed=True)
|
||||||
|
|
||||||
# test --sharded_ddp w/o --fp16
|
# test --sharded_ddp w/o --fp16
|
||||||
|
@unittest.skip("Requires an update of the env running those tests")
|
||||||
@require_torch_multi_gpu
|
@require_torch_multi_gpu
|
||||||
@require_fairscale
|
@require_fairscale
|
||||||
def test_run_seq2seq_sharded_ddp(self):
|
def test_run_seq2seq_sharded_ddp(self):
|
||||||
@@ -118,6 +119,7 @@ class TestTrainerExt(TestCasePlus):
|
|||||||
self.run_seq2seq_quick(distributed=True, extra_args_str="--sharded_ddp simple --fp16")
|
self.run_seq2seq_quick(distributed=True, extra_args_str="--sharded_ddp simple --fp16")
|
||||||
|
|
||||||
# test --sharded_ddp zero_dp_2 w/o --fp16
|
# test --sharded_ddp zero_dp_2 w/o --fp16
|
||||||
|
@unittest.skip("Requires an update of the env running those tests")
|
||||||
@require_torch_multi_gpu
|
@require_torch_multi_gpu
|
||||||
@require_fairscale
|
@require_fairscale
|
||||||
def test_run_seq2seq_fully_sharded_ddp(self):
|
def test_run_seq2seq_fully_sharded_ddp(self):
|
||||||
|
|||||||
@@ -497,7 +497,7 @@ def retrieve_artifact(name: str, gpu: Optional[str]):
|
|||||||
raise ValueError(f"Invalid GPU for artifact. Passed GPU: `{gpu}`.")
|
raise ValueError(f"Invalid GPU for artifact. Passed GPU: `{gpu}`.")
|
||||||
|
|
||||||
if gpu is not None:
|
if gpu is not None:
|
||||||
name = f"{gpu}-gpu-docker_{name}"
|
name = f"{gpu}-gpu_{name}"
|
||||||
|
|
||||||
_artifact = {}
|
_artifact = {}
|
||||||
|
|
||||||
@@ -531,8 +531,8 @@ def retrieve_available_artifacts():
|
|||||||
|
|
||||||
directories = filter(os.path.isdir, os.listdir())
|
directories = filter(os.path.isdir, os.listdir())
|
||||||
for directory in directories:
|
for directory in directories:
|
||||||
if directory.startswith("single-gpu-docker"):
|
if directory.startswith("single-gpu"):
|
||||||
artifact_name = directory[len("single-gpu-docker") + 1 :]
|
artifact_name = directory[len("single-gpu") + 1 :]
|
||||||
|
|
||||||
if artifact_name in _available_artifacts:
|
if artifact_name in _available_artifacts:
|
||||||
_available_artifacts[artifact_name].single_gpu = True
|
_available_artifacts[artifact_name].single_gpu = True
|
||||||
@@ -541,8 +541,8 @@ def retrieve_available_artifacts():
|
|||||||
|
|
||||||
_available_artifacts[artifact_name].add_path(directory, gpu="single")
|
_available_artifacts[artifact_name].add_path(directory, gpu="single")
|
||||||
|
|
||||||
elif directory.startswith("multi-gpu-docker"):
|
elif directory.startswith("multi-gpu"):
|
||||||
artifact_name = directory[len("multi-gpu-docker") + 1 :]
|
artifact_name = directory[len("multi-gpu") + 1 :]
|
||||||
|
|
||||||
if artifact_name in _available_artifacts:
|
if artifact_name in _available_artifacts:
|
||||||
_available_artifacts[artifact_name].multi_gpu = True
|
_available_artifacts[artifact_name].multi_gpu = True
|
||||||
@@ -561,6 +561,10 @@ def retrieve_available_artifacts():
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
# This env. variable is set in workflow file (under the job `send_results`).
|
||||||
|
ci_event = os.environ["CI_EVENT"]
|
||||||
|
|
||||||
arguments = sys.argv[1:][0]
|
arguments = sys.argv[1:][0]
|
||||||
try:
|
try:
|
||||||
models = ast.literal_eval(arguments)
|
models = ast.literal_eval(arguments)
|
||||||
@@ -609,7 +613,7 @@ if __name__ == "__main__":
|
|||||||
if "stats" in artifact:
|
if "stats" in artifact:
|
||||||
# Link to the GitHub Action job
|
# Link to the GitHub Action job
|
||||||
model_results[model]["job_link"] = github_actions_job_links.get(
|
model_results[model]["job_link"] = github_actions_job_links.get(
|
||||||
f"Model tests ({model}, {artifact_path['gpu']}-gpu-docker)"
|
f"Model tests ({model}, {artifact_path['gpu']}-gpu)"
|
||||||
)
|
)
|
||||||
|
|
||||||
failed, success, time_spent = handle_test_results(artifact["stats"])
|
failed, success, time_spent = handle_test_results(artifact["stats"])
|
||||||
@@ -667,6 +671,11 @@ if __name__ == "__main__":
|
|||||||
"Torch CUDA extension tests": "run_tests_torch_cuda_extensions_gpu_test_reports",
|
"Torch CUDA extension tests": "run_tests_torch_cuda_extensions_gpu_test_reports",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ci_event == "push":
|
||||||
|
del additional_files["Examples directory"]
|
||||||
|
del additional_files["PyTorch pipelines"]
|
||||||
|
del additional_files["TensorFlow pipelines"]
|
||||||
|
|
||||||
additional_results = {
|
additional_results = {
|
||||||
key: {
|
key: {
|
||||||
"failed": {"unclassified": 0, "single": 0, "multi": 0},
|
"failed": {"unclassified": 0, "single": 0, "multi": 0},
|
||||||
@@ -689,7 +698,7 @@ if __name__ == "__main__":
|
|||||||
for artifact_path in available_artifacts[additional_files[key]].paths:
|
for artifact_path in available_artifacts[additional_files[key]].paths:
|
||||||
if artifact_path["gpu"] is not None:
|
if artifact_path["gpu"] is not None:
|
||||||
additional_results[key]["job_link"] = github_actions_job_links.get(
|
additional_results[key]["job_link"] = github_actions_job_links.get(
|
||||||
f"{key} ({artifact_path['gpu']}-gpu-docker)"
|
f"{key} ({artifact_path['gpu']}-gpu)"
|
||||||
)
|
)
|
||||||
artifact = retrieve_artifact(artifact_path["name"], artifact_path["gpu"])
|
artifact = retrieve_artifact(artifact_path["name"], artifact_path["gpu"])
|
||||||
stacktraces = handle_stacktraces(artifact["failures_line"])
|
stacktraces = handle_stacktraces(artifact["failures_line"])
|
||||||
@@ -715,7 +724,7 @@ if __name__ == "__main__":
|
|||||||
artifact_path["gpu"]
|
artifact_path["gpu"]
|
||||||
] += f"*{line}*\n_{stacktraces.pop(0)}_\n\n"
|
] += f"*{line}*\n_{stacktraces.pop(0)}_\n\n"
|
||||||
|
|
||||||
message = Message("🤗 Results of the scheduled tests.", model_results, additional_results)
|
message = Message(f"🤗 Results of the {ci_event} tests.", model_results, additional_results)
|
||||||
|
|
||||||
message.post()
|
message.post()
|
||||||
message.post_reply()
|
message.post_reply()
|
||||||
|
|||||||
@@ -15,6 +15,7 @@
|
|||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import collections
|
import collections
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
@@ -65,6 +66,32 @@ def clean_code(content):
|
|||||||
return "\n".join(lines_to_keep)
|
return "\n".join(lines_to_keep)
|
||||||
|
|
||||||
|
|
||||||
|
def get_all_tests():
|
||||||
|
"""
|
||||||
|
Return a list of paths to all test folders and files under `tests`. All paths are rooted at `tests`.
|
||||||
|
|
||||||
|
- folders under `tests`: `tokenization`, `pipelines`, etc. The folder `models` is excluded.
|
||||||
|
- folders under `tests/models`: `bert`, `gpt2`, etc.
|
||||||
|
- test files under `tests`: `test_modeling_common.py`, `test_tokenization_common.py`, etc.
|
||||||
|
"""
|
||||||
|
test_root_dir = os.path.join(PATH_TO_TRANFORMERS, "tests")
|
||||||
|
|
||||||
|
# test folders/files directly under `tests` folder
|
||||||
|
tests = os.listdir(test_root_dir)
|
||||||
|
tests = sorted(
|
||||||
|
list(filter(lambda x: os.path.isdir(x) or x.startswith("tests/test_"), [f"tests/{x}" for x in tests]))
|
||||||
|
)
|
||||||
|
|
||||||
|
# model specific test folders
|
||||||
|
model_tests_folders = os.listdir(os.path.join(test_root_dir, "models"))
|
||||||
|
model_test_folders = sorted(list(filter(os.path.isdir, [f"tests/models/{x}" for x in model_tests_folders])))
|
||||||
|
|
||||||
|
tests.remove("tests/models")
|
||||||
|
tests = model_test_folders + tests
|
||||||
|
|
||||||
|
return tests
|
||||||
|
|
||||||
|
|
||||||
def diff_is_docstring_only(repo, branching_point, filename):
|
def diff_is_docstring_only(repo, branching_point, filename):
|
||||||
"""
|
"""
|
||||||
Check if the diff is only in docstrings in a filename.
|
Check if the diff is only in docstrings in a filename.
|
||||||
@@ -441,7 +468,7 @@ def sanity_check():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def infer_tests_to_run(output_file, diff_with_last_commit=False, filters=None):
|
def infer_tests_to_run(output_file, diff_with_last_commit=False, filters=None, json_output_file=None):
|
||||||
modified_files = get_modified_python_files(diff_with_last_commit=diff_with_last_commit)
|
modified_files = get_modified_python_files(diff_with_last_commit=diff_with_last_commit)
|
||||||
print(f"\n### MODIFIED FILES ###\n{_print_list(modified_files)}")
|
print(f"\n### MODIFIED FILES ###\n{_print_list(modified_files)}")
|
||||||
|
|
||||||
@@ -495,6 +522,42 @@ def infer_tests_to_run(output_file, diff_with_last_commit=False, filters=None):
|
|||||||
with open(output_file, "w", encoding="utf-8") as f:
|
with open(output_file, "w", encoding="utf-8") as f:
|
||||||
f.write(" ".join(test_files_to_run))
|
f.write(" ".join(test_files_to_run))
|
||||||
|
|
||||||
|
# Create a map that maps test categories to test files, i.e. `models/bert` -> [...test_modeling_bert.py, ...]
|
||||||
|
|
||||||
|
# Get all test directories (and some common test files) under `tests` and `tests/models` if `test_files_to_run`
|
||||||
|
# contains `tests` (i.e. when `setup.py` is changed).
|
||||||
|
if "tests" in test_files_to_run:
|
||||||
|
test_files_to_run = get_all_tests()
|
||||||
|
|
||||||
|
if json_output_file is not None:
|
||||||
|
test_map = {}
|
||||||
|
for test_file in test_files_to_run:
|
||||||
|
# `test_file` is a path to a test folder/file, starting with `tests/`. For example,
|
||||||
|
# - `tests/models/bert/test_modeling_bert.py` or `tests/models/bert`
|
||||||
|
# - `tests/trainer/test_trainer.py` or `tests/trainer`
|
||||||
|
# - `tests/test_modeling_common.py`
|
||||||
|
names = test_file.split(os.path.sep)
|
||||||
|
if names[1] == "models":
|
||||||
|
# take the part like `models/bert` for modeling tests
|
||||||
|
key = "/".join(names[1:3])
|
||||||
|
elif len(names) > 2 or not test_file.endswith(".py"):
|
||||||
|
# test folders under `tests` or python files under them
|
||||||
|
# take the part like tokenization, `pipeline`, etc. for other test categories
|
||||||
|
key = "/".join(names[1:2])
|
||||||
|
else:
|
||||||
|
# common test files directly under `tests/`
|
||||||
|
key = "common"
|
||||||
|
|
||||||
|
if key not in test_map:
|
||||||
|
test_map[key] = []
|
||||||
|
test_map[key].append(test_file)
|
||||||
|
|
||||||
|
# sort the keys & values
|
||||||
|
keys = sorted(test_map.keys())
|
||||||
|
test_map = {k: " ".join(sorted(test_map[k])) for k in keys}
|
||||||
|
with open(json_output_file, "w", encoding="UTF-8") as fp:
|
||||||
|
json.dump(test_map, fp, ensure_ascii=False)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
@@ -504,6 +567,12 @@ if __name__ == "__main__":
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--output_file", type=str, default="test_list.txt", help="Where to store the list of tests to run"
|
"--output_file", type=str, default="test_list.txt", help="Where to store the list of tests to run"
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--json_output_file",
|
||||||
|
type=str,
|
||||||
|
default="test_map.json",
|
||||||
|
help="Where to store the tests to run in a dictionary format mapping test categories to test files",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--diff_with_last_commit",
|
"--diff_with_last_commit",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
@@ -528,7 +597,12 @@ if __name__ == "__main__":
|
|||||||
diff_with_last_commit = True
|
diff_with_last_commit = True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
infer_tests_to_run(args.output_file, diff_with_last_commit=diff_with_last_commit, filters=args.filters)
|
infer_tests_to_run(
|
||||||
|
args.output_file,
|
||||||
|
diff_with_last_commit=diff_with_last_commit,
|
||||||
|
filters=args.filters,
|
||||||
|
json_output_file=args.json_output_file,
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"\nError when trying to grab the relevant tests: {e}\n\nRunning all tests.")
|
print(f"\nError when trying to grab the relevant tests: {e}\n\nRunning all tests.")
|
||||||
with open(args.output_file, "w", encoding="utf-8") as f:
|
with open(args.output_file, "w", encoding="utf-8") as f:
|
||||||
|
|||||||
Reference in New Issue
Block a user