Tests run on Docker (#10681)
* Tests run on Docker Co-authored-by: Morgan <funtowiczmo@gmail.com> * Comments from code review * Reply to itself * Dependencies Co-authored-by: Morgan <funtowiczmo@gmail.com>
This commit is contained in:
252
.github/workflows/self-push.yml
vendored
252
.github/workflows/self-push.yml
vendored
@@ -10,73 +10,42 @@ on:
|
|||||||
- "tests/**"
|
- "tests/**"
|
||||||
- ".github/**"
|
- ".github/**"
|
||||||
- "templates/**"
|
- "templates/**"
|
||||||
# pull_request:
|
|
||||||
repository_dispatch:
|
repository_dispatch:
|
||||||
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_tests_torch_gpu:
|
run_tests_torch_gpu:
|
||||||
runs-on: [self-hosted, gpu, single-gpu]
|
runs-on: [self-hosted, docker-gpu, single-gpu]
|
||||||
|
container:
|
||||||
|
image: pytorch/pytorch:1.8.0-cuda11.1-cudnn8-runtime
|
||||||
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- name: Launcher docker
|
||||||
- name: Python version
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
|
- name: NVIDIA-SMI
|
||||||
run: |
|
run: |
|
||||||
which python
|
nvidia-smi
|
||||||
python --version
|
|
||||||
pip --version
|
|
||||||
|
|
||||||
- name: Current dir
|
|
||||||
run: pwd
|
|
||||||
|
|
||||||
- run: nvidia-smi
|
|
||||||
|
|
||||||
- name: Kill any run-away pytest processes
|
|
||||||
run: (pkill -f tests; pkill -f examples) || echo "no zombies"
|
|
||||||
|
|
||||||
- name: Loading cache.
|
|
||||||
uses: actions/cache@v2
|
|
||||||
id: cache
|
|
||||||
with:
|
|
||||||
path: .env
|
|
||||||
key: v1.2-tests_torch_gpu-${{ hashFiles('setup.py') }}
|
|
||||||
|
|
||||||
- name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
|
|
||||||
run: |
|
|
||||||
python -m venv .env
|
|
||||||
source .env/bin/activate
|
|
||||||
which python
|
|
||||||
python --version
|
|
||||||
pip --version
|
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
apt -y update && apt install -y libsndfile1-dev
|
||||||
sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev
|
|
||||||
pip install --upgrade pip
|
pip install --upgrade pip
|
||||||
pip install .[torch,sklearn,testing,onnxruntime,sentencepiece,speech]
|
pip install .[sklearn,testing,onnxruntime,sentencepiece,speech]
|
||||||
pip install git+https://github.com/huggingface/datasets
|
|
||||||
|
|
||||||
- name: Are GPUs recognized by our DL frameworks
|
- name: Are GPUs recognized by our DL frameworks
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
|
||||||
python -c "import torch; print('Cuda available:', torch.cuda.is_available())"
|
python -c "import torch; print('Cuda available:', torch.cuda.is_available())"
|
||||||
|
python -c "import torch; print('Cuda version:', torch.version.cuda)"
|
||||||
|
python -c "import torch; print('CuDNN version:', torch.backends.cudnn.version())"
|
||||||
python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"
|
python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"
|
||||||
|
|
||||||
# - name: Create model files
|
|
||||||
# run: |
|
|
||||||
# source .env/bin/activate
|
|
||||||
# transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/encoder-bert-tokenizer.json --path=templates/adding_a_new_model
|
|
||||||
# transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/pt-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
|
|
||||||
# transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/standalone.json --path=templates/adding_a_new_model
|
|
||||||
# transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/tf-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
|
|
||||||
|
|
||||||
- name: Run all non-slow tests on GPU
|
- name: Run all non-slow tests on GPU
|
||||||
env:
|
env:
|
||||||
OMP_NUM_THREADS: 1
|
OMP_NUM_THREADS: 8
|
||||||
CUDA_VISIBLE_DEVICES: 0
|
MKL_NUM_THREADS: 8
|
||||||
|
HF_HOME: /mnt/cache
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
python -m pytest -n 2 --dist=loadfile --make-reports=tests_torch_gpu tests
|
||||||
python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_torch_gpu tests
|
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
@@ -89,68 +58,38 @@ jobs:
|
|||||||
name: run_all_tests_torch_gpu_test_reports
|
name: run_all_tests_torch_gpu_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
|
|
||||||
|
|
||||||
run_tests_tf_gpu:
|
run_tests_tf_gpu:
|
||||||
runs-on: [self-hosted, gpu, single-gpu]
|
runs-on: [self-hosted, docker-gpu, single-gpu]
|
||||||
|
container:
|
||||||
|
image: tensorflow/tensorflow:2.4.1-gpu
|
||||||
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- name: Launcher docker
|
||||||
- name: Python version
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
|
- name: NVIDIA-SMI
|
||||||
run: |
|
run: |
|
||||||
which python
|
nvidia-smi
|
||||||
python --version
|
|
||||||
pip --version
|
|
||||||
|
|
||||||
- name: Current dir
|
|
||||||
run: pwd
|
|
||||||
|
|
||||||
- run: nvidia-smi
|
|
||||||
|
|
||||||
- name: Kill any run-away pytest processes
|
|
||||||
run: (pkill -f tests; pkill -f examples) || echo "no zombies"
|
|
||||||
|
|
||||||
- name: Loading cache.
|
|
||||||
uses: actions/cache@v2
|
|
||||||
id: cache
|
|
||||||
with:
|
|
||||||
path: .env
|
|
||||||
key: v1.2-tests_tf_gpu-${{ hashFiles('setup.py') }}
|
|
||||||
|
|
||||||
- name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
|
|
||||||
run: |
|
|
||||||
python -m venv .env
|
|
||||||
source .env/bin/activate
|
|
||||||
which python
|
|
||||||
python --version
|
|
||||||
pip --version
|
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
|
||||||
pip install --upgrade pip
|
pip install --upgrade pip
|
||||||
pip install .[tf,sklearn,testing,onnxruntime,sentencepiece]
|
pip install .[sklearn,testing,onnxruntime,sentencepiece]
|
||||||
pip install git+https://github.com/huggingface/datasets
|
|
||||||
|
|
||||||
- name: Are GPUs recognized by our DL frameworks
|
- name: Are GPUs recognized by our DL frameworks
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
|
||||||
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
|
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
|
||||||
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
|
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
|
||||||
|
|
||||||
- name: Create model files
|
|
||||||
run: |
|
|
||||||
source .env/bin/activate
|
|
||||||
# transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/encoder-bert-tokenizer.json --path=templates/adding_a_new_model
|
|
||||||
# transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/pt-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
|
|
||||||
# transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/standalone.json --path=templates/adding_a_new_model
|
|
||||||
# transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/tf-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
|
|
||||||
|
|
||||||
- name: Run all non-slow tests on GPU
|
- name: Run all non-slow tests on GPU
|
||||||
env:
|
env:
|
||||||
OMP_NUM_THREADS: 1
|
OMP_NUM_THREADS: 8
|
||||||
CUDA_VISIBLE_DEVICES: 0
|
MKL_NUM_THREADS: 8
|
||||||
|
TF_NUM_INTRAOP_THREADS: 8
|
||||||
|
TF_NUM_INTEROP_THREADS: 1
|
||||||
|
HF_HOME: /mnt/cache
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
python -m pytest -n 2 --dist=loadfile --make-reports=tests_tf_gpu tests
|
||||||
python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_tf_gpu tests
|
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
@@ -163,58 +102,41 @@ jobs:
|
|||||||
name: run_all_tests_tf_gpu_test_reports
|
name: run_all_tests_tf_gpu_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
|
|
||||||
|
|
||||||
run_tests_torch_multi_gpu:
|
run_tests_torch_multi_gpu:
|
||||||
runs-on: [self-hosted, gpu, multi-gpu]
|
runs-on: [self-hosted, docker-gpu, multi-gpu]
|
||||||
|
container:
|
||||||
|
image: pytorch/pytorch:1.8.0-cuda11.1-cudnn8-runtime
|
||||||
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- name: Launcher docker
|
||||||
- name: Python version
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
|
- name: NVIDIA-SMI
|
||||||
run: |
|
run: |
|
||||||
which python
|
nvidia-smi
|
||||||
python --version
|
|
||||||
pip --version
|
|
||||||
|
|
||||||
- name: Current dir
|
|
||||||
run: pwd
|
|
||||||
|
|
||||||
- run: nvidia-smi
|
|
||||||
|
|
||||||
- name: Kill any run-away pytest processes
|
|
||||||
run: (pkill -f tests; pkill -f examples) || echo "no zombies"
|
|
||||||
|
|
||||||
- name: Loading cache.
|
|
||||||
uses: actions/cache@v2
|
|
||||||
id: cache
|
|
||||||
with:
|
|
||||||
path: .env
|
|
||||||
key: v1.2-tests_torch_multi_gpu-${{ hashFiles('setup.py') }}
|
|
||||||
|
|
||||||
- name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
|
|
||||||
run: |
|
|
||||||
python -m venv .env
|
|
||||||
source .env/bin/activate
|
|
||||||
which python
|
|
||||||
python --version
|
|
||||||
pip --version
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
apt -y update && apt install -y libsndfile1-dev
|
||||||
sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev
|
|
||||||
pip install --upgrade pip
|
pip install --upgrade pip
|
||||||
pip install .[torch,sklearn,testing,onnxruntime,sentencepiece,speech]
|
pip install .[sklearn,testing,onnxruntime,sentencepiece,speech]
|
||||||
pip install git+https://github.com/huggingface/datasets
|
|
||||||
|
|
||||||
- name: Are GPUs recognized by our DL frameworks
|
- name: Are GPUs recognized by our DL frameworks
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
|
||||||
python -c "import torch; print('Cuda available:', torch.cuda.is_available())"
|
python -c "import torch; print('Cuda available:', torch.cuda.is_available())"
|
||||||
|
python -c "import torch; print('Cuda version:', torch.version.cuda)"
|
||||||
|
python -c "import torch; print('CuDNN version:', torch.backends.cudnn.version())"
|
||||||
python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"
|
python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"
|
||||||
|
|
||||||
- name: Run all non-slow tests on GPU
|
- name: Run all non-slow tests on GPU
|
||||||
env:
|
env:
|
||||||
OMP_NUM_THREADS: 1
|
OMP_NUM_THREADS: 8
|
||||||
|
MKL_NUM_THREADS: 8
|
||||||
|
MKL_SERVICE_FORCE_INTEL: 1
|
||||||
|
HF_HOME: /mnt/cache
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
python -m pytest -n 2 --dist=loadfile --make-reports=tests_torch_multi_gpu tests
|
||||||
python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_torch_multi_gpu tests
|
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
@@ -228,56 +150,37 @@ jobs:
|
|||||||
path: reports
|
path: reports
|
||||||
|
|
||||||
run_tests_tf_multi_gpu:
|
run_tests_tf_multi_gpu:
|
||||||
runs-on: [self-hosted, gpu, multi-gpu]
|
runs-on: [self-hosted, docker-gpu, multi-gpu]
|
||||||
|
container:
|
||||||
|
image: tensorflow/tensorflow:2.4.1-gpu
|
||||||
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- name: Launcher docker
|
||||||
- name: Python version
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
|
- name: NVIDIA-SMI
|
||||||
run: |
|
run: |
|
||||||
which python
|
nvidia-smi
|
||||||
python --version
|
|
||||||
pip --version
|
|
||||||
|
|
||||||
- name: Current dir
|
|
||||||
run: pwd
|
|
||||||
|
|
||||||
- run: nvidia-smi
|
|
||||||
|
|
||||||
- name: Kill any run-away pytest processes
|
|
||||||
run: (pkill -f tests; pkill -f examples) || echo "no zombies"
|
|
||||||
|
|
||||||
- name: Loading cache.
|
|
||||||
uses: actions/cache@v2
|
|
||||||
id: cache
|
|
||||||
with:
|
|
||||||
path: .env
|
|
||||||
key: v1.2-tests_tf_multi_gpu-${{ hashFiles('setup.py') }}
|
|
||||||
|
|
||||||
- name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
|
|
||||||
run: |
|
|
||||||
python -m venv .env
|
|
||||||
source .env/bin/activate
|
|
||||||
which python
|
|
||||||
python --version
|
|
||||||
pip --version
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
|
||||||
pip install --upgrade pip
|
pip install --upgrade pip
|
||||||
pip install .[tf,sklearn,testing,onnxruntime,sentencepiece]
|
pip install .[sklearn,testing,onnxruntime,sentencepiece]
|
||||||
pip install git+https://github.com/huggingface/datasets
|
|
||||||
|
|
||||||
- name: Are GPUs recognized by our DL frameworks
|
- name: Are GPUs recognized by our DL frameworks
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
|
||||||
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
|
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
|
||||||
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
|
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
|
||||||
|
|
||||||
- name: Run all non-slow tests on GPU
|
- name: Run all non-slow tests on GPU
|
||||||
env:
|
env:
|
||||||
OMP_NUM_THREADS: 1
|
OMP_NUM_THREADS: 8
|
||||||
|
MKL_NUM_THREADS: 8
|
||||||
|
TF_NUM_INTRAOP_THREADS: 8
|
||||||
|
TF_NUM_INTEROP_THREADS: 1
|
||||||
|
HF_HOME: /mnt/cache
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
python -m pytest -n 2 --dist=loadfile --make-reports=tests_tf_multi_gpu tests
|
||||||
python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_tf_multi_gpu tests
|
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
@@ -289,3 +192,22 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
name: run_all_tests_tf_multi_gpu_test_reports
|
name: run_all_tests_tf_multi_gpu_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
|
|
||||||
|
send_results:
|
||||||
|
name: Send results to webhook
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
if: always()
|
||||||
|
needs: [run_tests_torch_gpu, run_tests_tf_gpu, run_tests_torch_multi_gpu, run_tests_tf_multi_gpu]
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
|
||||||
|
- uses: actions/download-artifact@v2
|
||||||
|
|
||||||
|
- name: Send message to Slack
|
||||||
|
env:
|
||||||
|
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||||
|
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
||||||
|
|
||||||
|
run: |
|
||||||
|
pip install slack_sdk
|
||||||
|
python utils/notification_service.py push
|
||||||
311
.github/workflows/self-scheduled.yml
vendored
311
.github/workflows/self-scheduled.yml
vendored
@@ -1,8 +1,3 @@
|
|||||||
# configuration notes:
|
|
||||||
#
|
|
||||||
# - `source .env/bin/activate` is currently needed to be run first thing first in each step. Otherwise
|
|
||||||
# the step uses the system-wide python interpreter.
|
|
||||||
|
|
||||||
name: Self-hosted runner (scheduled)
|
name: Self-hosted runner (scheduled)
|
||||||
|
|
||||||
on:
|
on:
|
||||||
@@ -15,61 +10,39 @@ on:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_all_tests_torch_gpu:
|
run_all_tests_torch_gpu:
|
||||||
runs-on: [self-hosted, gpu, single-gpu]
|
runs-on: [self-hosted, docker-gpu, single-gpu]
|
||||||
|
container:
|
||||||
|
image: pytorch/pytorch:1.8.0-cuda11.1-cudnn8-runtime
|
||||||
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- name: Launcher docker
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
- name: Loading cache.
|
- name: NVIDIA-SMI
|
||||||
uses: actions/cache@v2
|
|
||||||
id: cache
|
|
||||||
with:
|
|
||||||
path: .env
|
|
||||||
key: v 1.2-slow_tests_torch_gpu-${{ hashFiles('setup.py') }}
|
|
||||||
|
|
||||||
- name: Python version
|
|
||||||
run: |
|
run: |
|
||||||
which python
|
nvidia-smi
|
||||||
python --version
|
|
||||||
pip --version
|
|
||||||
|
|
||||||
- name: Current dir
|
|
||||||
run: pwd
|
|
||||||
|
|
||||||
- run: nvidia-smi
|
|
||||||
|
|
||||||
- name: Kill any run-away pytest processes
|
|
||||||
run: (pkill -f tests; pkill -f examples) || echo "no zombies"
|
|
||||||
|
|
||||||
- name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
|
|
||||||
if: steps.cache.outputs.cache-hit != 'true'
|
|
||||||
run: |
|
|
||||||
python -m venv .env
|
|
||||||
source .env/bin/activate
|
|
||||||
which python
|
|
||||||
python --version
|
|
||||||
pip --version
|
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
apt -y update && apt install -y libsndfile1-dev
|
||||||
pip install --upgrade pip
|
pip install --upgrade pip
|
||||||
pip install .[torch,sklearn,testing,onnxruntime,sentencepiece]
|
pip install .[sklearn,testing,onnxruntime,sentencepiece,speech]
|
||||||
pip install git+https://github.com/huggingface/datasets
|
|
||||||
pip list
|
|
||||||
|
|
||||||
- name: Are GPUs recognized by our DL frameworks
|
- name: Are GPUs recognized by our DL frameworks
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
|
||||||
python -c "import torch; print('Cuda available:', torch.cuda.is_available())"
|
python -c "import torch; print('Cuda available:', torch.cuda.is_available())"
|
||||||
|
python -c "import torch; print('Cuda version:', torch.version.cuda)"
|
||||||
|
python -c "import torch; print('CuDNN version:', torch.backends.cudnn.version())"
|
||||||
python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"
|
python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"
|
||||||
|
|
||||||
- name: Run all tests on GPU
|
- name: Run all tests on GPU
|
||||||
env:
|
env:
|
||||||
OMP_NUM_THREADS: 1
|
OMP_NUM_THREADS: 16
|
||||||
|
MKL_NUM_THREADS: 16
|
||||||
RUN_SLOW: yes
|
RUN_SLOW: yes
|
||||||
|
HF_HOME: /mnt/cache
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
python -m pytest -n 1 --dist=loadfile --make-reports=tests_torch_gpu tests
|
||||||
python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_torch_gpu tests
|
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
@@ -78,12 +51,13 @@ jobs:
|
|||||||
- name: Run examples tests on GPU
|
- name: Run examples tests on GPU
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
env:
|
env:
|
||||||
OMP_NUM_THREADS: 1
|
OMP_NUM_THREADS: 16
|
||||||
|
MKL_NUM_THREADS: 16
|
||||||
RUN_SLOW: yes
|
RUN_SLOW: yes
|
||||||
|
HF_HOME: /mnt/cache
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
|
||||||
pip install -r examples/_tests_requirements.txt
|
pip install -r examples/_tests_requirements.txt
|
||||||
python -m pytest -n 1 --dist=loadfile -s --make-reports=examples_torch_gpu examples
|
python -m pytest -n 1 --dist=loadfile --make-reports=examples_torch_gpu examples
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
@@ -92,13 +66,13 @@ jobs:
|
|||||||
- name: Run all pipeline tests on GPU
|
- name: Run all pipeline tests on GPU
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
env:
|
env:
|
||||||
TF_FORCE_GPU_ALLOW_GROWTH: "true"
|
OMP_NUM_THREADS: 16
|
||||||
OMP_NUM_THREADS: 1
|
MKL_NUM_THREADS: 16
|
||||||
RUN_SLOW: yes
|
RUN_SLOW: yes
|
||||||
RUN_PIPELINE_TESTS: yes
|
RUN_PIPELINE_TESTS: yes
|
||||||
|
HF_HOME: /mnt/cache
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_torch_pipeline_gpu tests
|
||||||
python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_torch_pipeline_gpu tests
|
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
@@ -111,64 +85,39 @@ jobs:
|
|||||||
name: run_all_tests_torch_gpu_test_reports
|
name: run_all_tests_torch_gpu_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
|
|
||||||
|
|
||||||
run_all_tests_tf_gpu:
|
run_all_tests_tf_gpu:
|
||||||
runs-on: [self-hosted, gpu, single-gpu]
|
runs-on: [self-hosted, docker-gpu, single-gpu]
|
||||||
|
container:
|
||||||
|
image: tensorflow/tensorflow:2.4.1-gpu
|
||||||
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- name: Launcher docker
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
- name: Loading cache.
|
- name: NVIDIA-SMI
|
||||||
uses: actions/cache@v2
|
|
||||||
id: cache
|
|
||||||
with:
|
|
||||||
path: .env
|
|
||||||
key: v1.2-slow_tests_tf_gpu-${{ hashFiles('setup.py') }}
|
|
||||||
|
|
||||||
- name: Python version
|
|
||||||
run: |
|
run: |
|
||||||
which python
|
nvidia-smi
|
||||||
python --version
|
|
||||||
pip --version
|
|
||||||
|
|
||||||
- name: Current dir
|
|
||||||
run: pwd
|
|
||||||
|
|
||||||
- run: nvidia-smi
|
|
||||||
|
|
||||||
- name: Kill any run-away pytest processes
|
|
||||||
run: (pkill -f tests; pkill -f examples) || echo "no zombies"
|
|
||||||
|
|
||||||
|
|
||||||
- name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
|
|
||||||
if: steps.cache.outputs.cache-hit != 'true'
|
|
||||||
run: |
|
|
||||||
python -m venv .env
|
|
||||||
source .env/bin/activate
|
|
||||||
which python
|
|
||||||
python --version
|
|
||||||
pip --version
|
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
|
||||||
pip install --upgrade pip
|
pip install --upgrade pip
|
||||||
pip install .[tf,sklearn,testing,onnxruntime,sentencepiece]
|
pip install .[sklearn,testing,onnx,sentencepiece]
|
||||||
pip install git+https://github.com/huggingface/datasets
|
|
||||||
pip list
|
|
||||||
|
|
||||||
- name: Are GPUs recognized by our DL frameworks
|
- name: Are GPUs recognized by our DL frameworks
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
|
||||||
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
|
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
|
||||||
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
|
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
|
||||||
|
|
||||||
- name: Run all tests on GPU
|
- name: Run all tests on GPU
|
||||||
env:
|
env:
|
||||||
OMP_NUM_THREADS: 1
|
|
||||||
RUN_SLOW: yes
|
RUN_SLOW: yes
|
||||||
|
HF_HOME: /mnt/cache
|
||||||
|
OMP_NUM_THREADS: 16
|
||||||
|
TF_NUM_INTEROP_THREADS: 1
|
||||||
|
TF_NUM_INTRAOP_THREADS: 16
|
||||||
|
MKL_NUM_THREADS: 16
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_gpu tests
|
||||||
python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_tf_gpu tests
|
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
@@ -177,17 +126,19 @@ jobs:
|
|||||||
- name: Run all pipeline tests on GPU
|
- name: Run all pipeline tests on GPU
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
env:
|
env:
|
||||||
TF_FORCE_GPU_ALLOW_GROWTH: "true"
|
|
||||||
OMP_NUM_THREADS: 1
|
|
||||||
RUN_SLOW: yes
|
RUN_SLOW: yes
|
||||||
|
HF_HOME: /mnt/cache
|
||||||
|
OMP_NUM_THREADS: 16
|
||||||
RUN_PIPELINE_TESTS: yes
|
RUN_PIPELINE_TESTS: yes
|
||||||
|
TF_NUM_INTEROP_THREADS: 1
|
||||||
|
TF_NUM_INTRAOP_THREADS: 16
|
||||||
|
MKL_NUM_THREADS: 16
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_tf_pipeline_gpu tests
|
||||||
python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_tf_pipelines_gpu tests
|
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
run: cat reports/tests_tf_pipelines_gpu_failures_short.txt
|
run: cat reports/tests_tf_pipeline_gpu_failures_short.txt
|
||||||
|
|
||||||
- name: Test suite reports artifacts
|
- name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
@@ -197,92 +148,55 @@ jobs:
|
|||||||
path: reports
|
path: reports
|
||||||
|
|
||||||
run_all_tests_torch_multi_gpu:
|
run_all_tests_torch_multi_gpu:
|
||||||
runs-on: [self-hosted, gpu, multi-gpu]
|
runs-on: [self-hosted, docker-gpu, multi-gpu]
|
||||||
|
container:
|
||||||
|
image: pytorch/pytorch:1.8.0-cuda11.1-cudnn8-runtime
|
||||||
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- name: Launcher docker
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
- name: Loading cache.
|
- name: NVIDIA-SMI
|
||||||
uses: actions/cache@v2
|
|
||||||
id: cache
|
|
||||||
with:
|
|
||||||
path: .env
|
|
||||||
key: v1.2-slow_tests_torch_multi_gpu-${{ hashFiles('setup.py') }}
|
|
||||||
|
|
||||||
- name: Python version
|
|
||||||
run: |
|
run: |
|
||||||
which python
|
nvidia-smi
|
||||||
python --version
|
|
||||||
pip --version
|
|
||||||
|
|
||||||
- name: Current dir
|
|
||||||
run: pwd
|
|
||||||
|
|
||||||
- run: nvidia-smi
|
|
||||||
|
|
||||||
- name: Kill any run-away pytest processes
|
|
||||||
run: (pkill -f tests; pkill -f examples) || echo "no zombies"
|
|
||||||
|
|
||||||
- name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
|
|
||||||
if: steps.cache.outputs.cache-hit != 'true'
|
|
||||||
run: |
|
|
||||||
python -m venv .env
|
|
||||||
source .env/bin/activate
|
|
||||||
which python
|
|
||||||
python --version
|
|
||||||
pip --version
|
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
apt -y update && apt install -y libsndfile1-dev
|
||||||
pip install --upgrade pip
|
pip install --upgrade pip
|
||||||
pip install .[torch,sklearn,testing,onnxruntime,sentencepiece]
|
pip install .[sklearn,testing,onnxruntime,sentencepiece,speech]
|
||||||
pip install git+https://github.com/huggingface/datasets
|
|
||||||
pip install fairscale
|
|
||||||
pip install deepspeed
|
|
||||||
pip list
|
|
||||||
|
|
||||||
- name: Are GPUs recognized by our DL frameworks
|
- name: Are GPUs recognized by our DL frameworks
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
|
||||||
python -c "import torch; print('Cuda available:', torch.cuda.is_available())"
|
python -c "import torch; print('Cuda available:', torch.cuda.is_available())"
|
||||||
|
python -c "import torch; print('Cuda version:', torch.version.cuda)"
|
||||||
|
python -c "import torch; print('CuDNN version:', torch.backends.cudnn.version())"
|
||||||
python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"
|
python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"
|
||||||
|
|
||||||
- name: Run all tests on multi-GPU
|
- name: Run all tests on GPU
|
||||||
env:
|
env:
|
||||||
OMP_NUM_THREADS: 1
|
|
||||||
RUN_SLOW: yes
|
RUN_SLOW: yes
|
||||||
|
HF_HOME: /mnt/cache
|
||||||
|
OMP_NUM_THREADS: 16
|
||||||
|
MKL_NUM_THREADS: 16
|
||||||
|
MKL_SERVICE_FORCE_INTEL: 1
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
python -m pytest -n 1 --dist=loadfile --make-reports=tests_torch_multi_gpu tests
|
||||||
python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_torch_multi_gpu tests
|
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
run: cat reports/tests_torch_multi_gpu_failures_short.txt
|
run: cat reports/tests_torch_multi_gpu_failures_short.txt
|
||||||
|
|
||||||
- name: Run examples tests on multi-GPU
|
- name: Run all pipeline tests on GPU
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
env:
|
env:
|
||||||
OMP_NUM_THREADS: 1
|
OMP_NUM_THREADS: 16
|
||||||
RUN_SLOW: yes
|
MKL_NUM_THREADS: 16
|
||||||
run: |
|
|
||||||
source .env/bin/activate
|
|
||||||
pip install -r examples/_tests_requirements.txt
|
|
||||||
python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_torch_examples_multi_gpu examples
|
|
||||||
|
|
||||||
- name: Failure short reports
|
|
||||||
if: ${{ always() }}
|
|
||||||
run: cat reports/tests_torch_examples_multi_gpu_failures_short.txt
|
|
||||||
|
|
||||||
- name: Run all pipeline tests on multi-GPU
|
|
||||||
if: ${{ always() }}
|
|
||||||
env:
|
|
||||||
TF_FORCE_GPU_ALLOW_GROWTH: "true"
|
|
||||||
OMP_NUM_THREADS: 1
|
|
||||||
RUN_SLOW: yes
|
RUN_SLOW: yes
|
||||||
RUN_PIPELINE_TESTS: yes
|
RUN_PIPELINE_TESTS: yes
|
||||||
|
HF_HOME: /mnt/cache
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_torch_pipeline_multi_gpu tests
|
||||||
python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_torch_pipeline_multi_gpu tests
|
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
@@ -296,76 +210,55 @@ jobs:
|
|||||||
path: reports
|
path: reports
|
||||||
|
|
||||||
run_all_tests_tf_multi_gpu:
|
run_all_tests_tf_multi_gpu:
|
||||||
runs-on: [self-hosted, gpu, multi-gpu]
|
runs-on: [self-hosted, docker-gpu, multi-gpu]
|
||||||
|
container:
|
||||||
|
image: tensorflow/tensorflow:2.4.1-gpu
|
||||||
|
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- name: Launcher docker
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
- name: Loading cache.
|
- name: NVIDIA-SMI
|
||||||
uses: actions/cache@v2
|
|
||||||
id: cache
|
|
||||||
with:
|
|
||||||
path: .env
|
|
||||||
key: v1.2-slow_tests_tf_multi_gpu-${{ hashFiles('setup.py') }}
|
|
||||||
|
|
||||||
- name: Python version
|
|
||||||
run: |
|
run: |
|
||||||
which python
|
nvidia-smi
|
||||||
python --version
|
|
||||||
pip --version
|
|
||||||
|
|
||||||
- name: Current dir
|
|
||||||
run: pwd
|
|
||||||
|
|
||||||
- run: nvidia-smi
|
|
||||||
|
|
||||||
- name: Kill any run-away pytest processes
|
|
||||||
run: (pkill -f tests; pkill -f examples) || echo "no zombies"
|
|
||||||
|
|
||||||
- name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
|
|
||||||
if: steps.cache.outputs.cache-hit != 'true'
|
|
||||||
run: |
|
|
||||||
python -m venv .env
|
|
||||||
source .env/bin/activate
|
|
||||||
which python
|
|
||||||
python --version
|
|
||||||
pip --version
|
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
|
||||||
pip install --upgrade pip
|
pip install --upgrade pip
|
||||||
pip install .[tf,sklearn,testing,onnxruntime,sentencepiece]
|
pip install .[sklearn,testing,onnx,sentencepiece]
|
||||||
pip install git+https://github.com/huggingface/datasets
|
|
||||||
pip list
|
|
||||||
|
|
||||||
- name: Are GPUs recognized by our DL frameworks
|
- name: Are GPUs recognized by our DL frameworks
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
|
||||||
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
|
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
|
||||||
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
|
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
|
||||||
|
|
||||||
- name: Run all tests on multi-GPU
|
- name: Run all tests on GPU
|
||||||
env:
|
env:
|
||||||
OMP_NUM_THREADS: 1
|
OMP_NUM_THREADS: 16
|
||||||
RUN_SLOW: yes
|
RUN_SLOW: yes
|
||||||
|
MKL_NUM_THREADS: 16
|
||||||
|
TF_NUM_INTEROP_THREADS: 1
|
||||||
|
TF_NUM_INTRAOP_THREADS: 16
|
||||||
|
HF_HOME: /mnt/cache
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_multi_gpu tests
|
||||||
python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_tf_multi_gpu tests
|
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
run: cat reports/tests_tf_multi_gpu_failures_short.txt
|
run: cat reports/tests_tf_multi_gpu_failures_short.txt
|
||||||
|
|
||||||
- name: Run all pipeline tests on multi-GPU
|
- name: Run all pipeline tests on GPU
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
env:
|
env:
|
||||||
TF_FORCE_GPU_ALLOW_GROWTH: "true"
|
OMP_NUM_THREADS: 16
|
||||||
OMP_NUM_THREADS: 1
|
|
||||||
RUN_SLOW: yes
|
RUN_SLOW: yes
|
||||||
RUN_PIPELINE_TESTS: yes
|
RUN_PIPELINE_TESTS: yes
|
||||||
|
MKL_NUM_THREADS: 16
|
||||||
|
TF_NUM_INTEROP_THREADS: 1
|
||||||
|
TF_NUM_INTRAOP_THREADS: 16
|
||||||
|
HF_HOME: /mnt/cache
|
||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
python -m pytest -n 1 --dist=loadfile -m is_pipeline_test --make-reports=tests_tf_pipeline_multi_gpu tests
|
||||||
python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_tf_pipeline_multi_gpu tests
|
|
||||||
|
|
||||||
- name: Failure short reports
|
- name: Failure short reports
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
@@ -377,3 +270,23 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
name: run_all_tests_tf_multi_gpu_test_reports
|
name: run_all_tests_tf_multi_gpu_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
|
|
||||||
|
send_results:
|
||||||
|
name: Send results to webhook
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
if: always()
|
||||||
|
needs: [run_all_tests_torch_gpu, run_all_tests_tf_gpu, run_all_tests_torch_multi_gpu, run_all_tests_tf_multi_gpu]
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
|
||||||
|
- uses: actions/download-artifact@v2
|
||||||
|
|
||||||
|
- name: Send message to Slack
|
||||||
|
env:
|
||||||
|
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||||
|
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
|
||||||
|
|
||||||
|
|
||||||
|
run: |
|
||||||
|
pip install slack_sdk
|
||||||
|
python utils/notification_service.py scheduled
|
||||||
|
|||||||
4
setup.py
4
setup.py
@@ -115,6 +115,7 @@ _deps = [
|
|||||||
"psutil",
|
"psutil",
|
||||||
"pydantic",
|
"pydantic",
|
||||||
"pytest",
|
"pytest",
|
||||||
|
"pytest-sugar",
|
||||||
"pytest-xdist",
|
"pytest-xdist",
|
||||||
"python>=3.6.0",
|
"python>=3.6.0",
|
||||||
"recommonmark",
|
"recommonmark",
|
||||||
@@ -225,6 +226,7 @@ else:
|
|||||||
|
|
||||||
extras["tokenizers"] = deps_list("tokenizers")
|
extras["tokenizers"] = deps_list("tokenizers")
|
||||||
extras["onnxruntime"] = deps_list("onnxruntime", "onnxruntime-tools")
|
extras["onnxruntime"] = deps_list("onnxruntime", "onnxruntime-tools")
|
||||||
|
extras["onnx"] = deps_list("onnxconverter-common", "keras2onnx") + extras["onnxruntime"]
|
||||||
extras["modelcreation"] = deps_list("cookiecutter")
|
extras["modelcreation"] = deps_list("cookiecutter")
|
||||||
|
|
||||||
extras["serving"] = deps_list("pydantic", "uvicorn", "fastapi", "starlette")
|
extras["serving"] = deps_list("pydantic", "uvicorn", "fastapi", "starlette")
|
||||||
@@ -232,7 +234,7 @@ extras["speech"] = deps_list("soundfile", "torchaudio")
|
|||||||
|
|
||||||
extras["sentencepiece"] = deps_list("sentencepiece", "protobuf")
|
extras["sentencepiece"] = deps_list("sentencepiece", "protobuf")
|
||||||
extras["testing"] = (
|
extras["testing"] = (
|
||||||
deps_list("pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets")
|
deps_list("pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-sugar")
|
||||||
+ extras["retrieval"]
|
+ extras["retrieval"]
|
||||||
+ extras["modelcreation"]
|
+ extras["modelcreation"]
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ deps = {
|
|||||||
"psutil": "psutil",
|
"psutil": "psutil",
|
||||||
"pydantic": "pydantic",
|
"pydantic": "pydantic",
|
||||||
"pytest": "pytest",
|
"pytest": "pytest",
|
||||||
|
"pytest-sugar": "pytest-sugar",
|
||||||
"pytest-xdist": "pytest-xdist",
|
"pytest-xdist": "pytest-xdist",
|
||||||
"python": "python>=3.6.0",
|
"python": "python>=3.6.0",
|
||||||
"recommonmark": "recommonmark",
|
"recommonmark": "recommonmark",
|
||||||
|
|||||||
@@ -137,6 +137,17 @@ def slow(test_case):
|
|||||||
return test_case
|
return test_case
|
||||||
|
|
||||||
|
|
||||||
|
def tooslow(test_case):
|
||||||
|
"""
|
||||||
|
Decorator marking a test as too slow.
|
||||||
|
|
||||||
|
Slow tests are skipped while they're in the process of being fixed. No test should stay tagged as "tooslow" as
|
||||||
|
these will not be tested by the CI.
|
||||||
|
|
||||||
|
"""
|
||||||
|
return unittest.skip("test is too slow")(test_case)
|
||||||
|
|
||||||
|
|
||||||
def custom_tokenizers(test_case):
|
def custom_tokenizers(test_case):
|
||||||
"""
|
"""
|
||||||
Decorator marking a test for a custom tokenizer.
|
Decorator marking a test for a custom tokenizer.
|
||||||
|
|||||||
@@ -25,7 +25,14 @@ from importlib import import_module
|
|||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
|
|
||||||
from transformers import is_tf_available
|
from transformers import is_tf_available
|
||||||
from transformers.testing_utils import _tf_gpu_memory_limit, is_pt_tf_cross_test, require_onnx, require_tf, slow
|
from transformers.testing_utils import (
|
||||||
|
_tf_gpu_memory_limit,
|
||||||
|
is_pt_tf_cross_test,
|
||||||
|
require_onnx,
|
||||||
|
require_tf,
|
||||||
|
slow,
|
||||||
|
tooslow,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if is_tf_available():
|
if is_tf_available():
|
||||||
@@ -129,7 +136,7 @@ class TFModelTesterMixin:
|
|||||||
|
|
||||||
self.assert_outputs_same(after_outputs, outputs)
|
self.assert_outputs_same(after_outputs, outputs)
|
||||||
|
|
||||||
@slow
|
@tooslow
|
||||||
def test_graph_mode(self):
|
def test_graph_mode(self):
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
for model_class in self.all_model_classes:
|
for model_class in self.all_model_classes:
|
||||||
@@ -143,7 +150,7 @@ class TFModelTesterMixin:
|
|||||||
outputs = run_in_graph_mode()
|
outputs = run_in_graph_mode()
|
||||||
self.assertIsNotNone(outputs)
|
self.assertIsNotNone(outputs)
|
||||||
|
|
||||||
@slow
|
@tooslow
|
||||||
def test_xla_mode(self):
|
def test_xla_mode(self):
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
for model_class in self.all_model_classes:
|
for model_class in self.all_model_classes:
|
||||||
@@ -184,7 +191,7 @@ class TFModelTesterMixin:
|
|||||||
expected_arg_names = ["input_ids"]
|
expected_arg_names = ["input_ids"]
|
||||||
self.assertListEqual(arg_names[:1], expected_arg_names)
|
self.assertListEqual(arg_names[:1], expected_arg_names)
|
||||||
|
|
||||||
@slow
|
@tooslow
|
||||||
def test_saved_model_creation(self):
|
def test_saved_model_creation(self):
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
config.output_hidden_states = False
|
config.output_hidden_states = False
|
||||||
@@ -205,7 +212,7 @@ class TFModelTesterMixin:
|
|||||||
saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
|
saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
|
||||||
self.assertTrue(os.path.exists(saved_model_dir))
|
self.assertTrue(os.path.exists(saved_model_dir))
|
||||||
|
|
||||||
@slow
|
@tooslow
|
||||||
def test_saved_model_creation_extended(self):
|
def test_saved_model_creation_extended(self):
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
config.output_hidden_states = True
|
config.output_hidden_states = True
|
||||||
@@ -314,7 +321,7 @@ class TFModelTesterMixin:
|
|||||||
|
|
||||||
onnxruntime.InferenceSession(onnx_model.SerializeToString())
|
onnxruntime.InferenceSession(onnx_model.SerializeToString())
|
||||||
|
|
||||||
@slow
|
@tooslow
|
||||||
def test_mixed_precision(self):
|
def test_mixed_precision(self):
|
||||||
tf.keras.mixed_precision.experimental.set_policy("mixed_float16")
|
tf.keras.mixed_precision.experimental.set_policy("mixed_float16")
|
||||||
|
|
||||||
@@ -488,7 +495,7 @@ class TFModelTesterMixin:
|
|||||||
max_diff = np.amax(np.abs(tfo - pto))
|
max_diff = np.amax(np.abs(tfo - pto))
|
||||||
self.assertLessEqual(max_diff, 4e-2)
|
self.assertLessEqual(max_diff, 4e-2)
|
||||||
|
|
||||||
@slow
|
@tooslow
|
||||||
def test_train_pipeline_custom_model(self):
|
def test_train_pipeline_custom_model(self):
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
# head_mask and decoder_head_mask has different shapes than other input args
|
# head_mask and decoder_head_mask has different shapes than other input args
|
||||||
@@ -909,7 +916,7 @@ class TFModelTesterMixin:
|
|||||||
|
|
||||||
model(inputs)
|
model(inputs)
|
||||||
|
|
||||||
@slow
|
@tooslow
|
||||||
def test_graph_mode_with_inputs_embeds(self):
|
def test_graph_mode_with_inputs_embeds(self):
|
||||||
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
|
||||||
|
|
||||||
|
|||||||
185
utils/notification_service.py
Normal file
185
utils/notification_service.py
Normal file
@@ -0,0 +1,185 @@
|
|||||||
|
# Copyright 2020 The HuggingFace Team. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from slack_sdk import WebClient
|
||||||
|
|
||||||
|
|
||||||
|
def handle_test_results(test_results):
|
||||||
|
expressions = test_results.split(" ")
|
||||||
|
|
||||||
|
failed = 0
|
||||||
|
success = 0
|
||||||
|
|
||||||
|
# When the output is short enough, the output is surrounded by = signs: "== OUTPUT =="
|
||||||
|
# When it is too long, those signs are not present.
|
||||||
|
time_spent = expressions[-2] if "=" in expressions[-1] else expressions[-1]
|
||||||
|
|
||||||
|
for i, expression in enumerate(expressions):
|
||||||
|
if "failed" in expression:
|
||||||
|
failed += int(expressions[i - 1])
|
||||||
|
if "passed" in expression:
|
||||||
|
success += int(expressions[i - 1])
|
||||||
|
|
||||||
|
return failed, success, time_spent
|
||||||
|
|
||||||
|
|
||||||
|
def format_for_slack(total_results, results, scheduled: bool):
|
||||||
|
print(results)
|
||||||
|
header = {
|
||||||
|
"type": "header",
|
||||||
|
"text": {
|
||||||
|
"type": "plain_text",
|
||||||
|
"text": "🤗 Results of the scheduled tests, March 11, 2021." if scheduled else "🤗 Self-push results",
|
||||||
|
"emoji": True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
total = (
|
||||||
|
{
|
||||||
|
"type": "section",
|
||||||
|
"fields": [
|
||||||
|
{"type": "mrkdwn", "text": f"*Failures:*\n❌ {total_results['failed']} failures."},
|
||||||
|
{"type": "mrkdwn", "text": f"*Passed:*\n✅ {total_results['success']} tests passed."},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
if total_results["failed"] > 0
|
||||||
|
else {
|
||||||
|
"type": "section",
|
||||||
|
"fields": [{"type": "mrkdwn", "text": f"*Congrats!*\nAll {total_results['success']} tests pass."}],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
blocks = [header, total]
|
||||||
|
|
||||||
|
if total_results["failed"] > 0:
|
||||||
|
for key, result in results.items():
|
||||||
|
print(key, result)
|
||||||
|
blocks.append({"type": "header", "text": {"type": "plain_text", "text": key, "emoji": True}})
|
||||||
|
blocks.append(
|
||||||
|
{
|
||||||
|
"type": "section",
|
||||||
|
"fields": [
|
||||||
|
{
|
||||||
|
"type": "mrkdwn",
|
||||||
|
"text": f"*Results:*\n{result['failed']} failed, {result['success']} passed.",
|
||||||
|
},
|
||||||
|
{"type": "mrkdwn", "text": f"*Time spent:*\n{result['time_spent']}"},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
for key, result in results.items():
|
||||||
|
blocks.append(
|
||||||
|
{"type": "section", "fields": [{"type": "mrkdwn", "text": f"*{key}*\n{result['time_spent']}."}]}
|
||||||
|
)
|
||||||
|
|
||||||
|
footer = {
|
||||||
|
"type": "section",
|
||||||
|
"text": {
|
||||||
|
"type": "mrkdwn",
|
||||||
|
"text": "<https://github.com/huggingface/transformers/actions/workflows/self-scheduled.yml|View on GitHub>"
|
||||||
|
if scheduled
|
||||||
|
else "<https://github.com/huggingface/transformers/actions/workflows/self-push.yml|View on GitHub>",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
blocks.append(footer)
|
||||||
|
|
||||||
|
blocks = {"blocks": blocks}
|
||||||
|
|
||||||
|
return blocks
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
scheduled = sys.argv[1] == "scheduled"
|
||||||
|
|
||||||
|
if scheduled:
|
||||||
|
# The scheduled run has several artifacts for each job.
|
||||||
|
file_paths = {
|
||||||
|
"TF Single GPU": {
|
||||||
|
"common": "run_all_tests_tf_gpu_test_reports/tests_tf_gpu_[].txt",
|
||||||
|
"pipeline": "run_all_tests_tf_gpu_test_reports/tests_tf_pipeline_gpu_[].txt",
|
||||||
|
},
|
||||||
|
"Torch Single GPU": {
|
||||||
|
"common": "run_all_tests_torch_gpu_test_reports/tests_torch_gpu_[].txt",
|
||||||
|
"pipeline": "run_all_tests_torch_gpu_test_reports/tests_torch_pipeline_gpu_[].txt",
|
||||||
|
"examples": "run_all_tests_torch_gpu_test_reports/examples_torch_gpu_[].txt",
|
||||||
|
},
|
||||||
|
"TF Multi GPU": {
|
||||||
|
"common": "run_all_tests_tf_multi_gpu_test_reports/tests_tf_multi_gpu_[].txt",
|
||||||
|
"pipeline": "run_all_tests_tf_multi_gpu_test_reports/tests_tf_pipeline_multi_gpu_[].txt",
|
||||||
|
},
|
||||||
|
"Torch Multi GPU": {
|
||||||
|
"common": "run_all_tests_torch_multi_gpu_test_reports/tests_torch_multi_gpu_[].txt",
|
||||||
|
"pipeline": "run_all_tests_torch_multi_gpu_test_reports/tests_torch_pipeline_multi_gpu_[].txt",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
file_paths = {
|
||||||
|
"TF Single GPU": {"common": "run_all_tests_tf_gpu_test_reports/tests_tf_gpu_[].txt"},
|
||||||
|
"Torch Single GPU": {"common": "run_all_tests_torch_gpu_test_reports/tests_torch_gpu_[].txt"},
|
||||||
|
"TF Multi GPU": {"common": "run_all_tests_tf_multi_gpu_test_reports/tests_tf_multi_gpu_[].txt"},
|
||||||
|
"Torch Multi GPU": {"common": "run_all_tests_torch_multi_gpu_test_reports/tests_torch_multi_gpu_[].txt"},
|
||||||
|
}
|
||||||
|
|
||||||
|
client = WebClient(token=os.environ["CI_SLACK_BOT_TOKEN"])
|
||||||
|
channel_id = os.environ["CI_SLACK_CHANNEL_ID"]
|
||||||
|
|
||||||
|
try:
|
||||||
|
results = {}
|
||||||
|
for job, file_dict in file_paths.items():
|
||||||
|
|
||||||
|
# Single return value for failed/success across steps of a same job
|
||||||
|
results[job] = {"failed": 0, "success": 0, "time_spent": "", "failures": ""}
|
||||||
|
|
||||||
|
for key, file_path in file_dict.items():
|
||||||
|
with open(file_path.replace("[]", "stats")) as f:
|
||||||
|
failed, success, time_spent = handle_test_results(f.read())
|
||||||
|
results[job]["failed"] += failed
|
||||||
|
results[job]["success"] += success
|
||||||
|
results[job]["time_spent"] += time_spent[1:-1] + ", "
|
||||||
|
with open(file_path.replace("[]", "summary_short")) as f:
|
||||||
|
for line in f:
|
||||||
|
if re.search("FAILED", line):
|
||||||
|
results[job]["failures"] += line
|
||||||
|
|
||||||
|
# Remove the trailing ", "
|
||||||
|
results[job]["time_spent"] = results[job]["time_spent"][:-2]
|
||||||
|
|
||||||
|
test_results_keys = ["failed", "success"]
|
||||||
|
total = {"failed": 0, "success": 0}
|
||||||
|
for job, job_result in results.items():
|
||||||
|
for result_key in test_results_keys:
|
||||||
|
total[result_key] += job_result[result_key]
|
||||||
|
|
||||||
|
to_be_sent_to_slack = format_for_slack(total, results, scheduled)
|
||||||
|
|
||||||
|
result = client.chat_postMessage(
|
||||||
|
channel=channel_id,
|
||||||
|
blocks=to_be_sent_to_slack["blocks"],
|
||||||
|
)
|
||||||
|
|
||||||
|
for job, job_result in results.items():
|
||||||
|
if len(job_result["failures"]):
|
||||||
|
client.chat_postMessage(
|
||||||
|
channel=channel_id, text=f"{job}\n{job_result['failures']}", thread_ts=result["ts"]
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# Voluntarily catch every exception and send it to Slack.
|
||||||
|
raise Exception(f"Setup error: no artifacts were found. Error: {e}") from e
|
||||||
Reference in New Issue
Block a user