[Testing] Add Flax Tests on GPU, Add Speech and Vision to Flax & TF tests (#13313)
* up * finish * Apply suggestions from code review * apply Lysandres suggestions * adapt circle ci as well * finish * Update setup.py
This commit is contained in:
committed by
GitHub
parent
8b2de0e483
commit
062300ba7f
118
.github/workflows/self-push.yml
vendored
118
.github/workflows/self-push.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
||||
apt -y update && apt install -y software-properties-common && apt -y update && add-apt-repository -y ppa:git-core/ppa && apt -y update && apt install -y git
|
||||
apt install -y libsndfile1-dev
|
||||
pip install --upgrade pip
|
||||
pip install .[sklearn,testing,onnxruntime,sentencepiece,speech,vision,timm]
|
||||
pip install .[sklearn,testing,onnxruntime,sentencepiece,torch-speech,vision,timm]
|
||||
|
||||
- name: Launcher docker
|
||||
uses: actions/checkout@v2
|
||||
@@ -78,6 +78,61 @@ jobs:
|
||||
name: run_all_tests_torch_gpu_test_reports
|
||||
path: reports
|
||||
|
||||
run_tests_flax_gpu:
|
||||
runs-on: [self-hosted, docker-gpu, single-gpu]
|
||||
container:
|
||||
image: tensorflow/tensorflow:2.4.1-gpu
|
||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
apt -y update && apt install -y software-properties-common && apt -y update && add-apt-repository -y ppa:git-core/ppa && apt -y update && apt install -y git
|
||||
pip install --upgrade "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html
|
||||
pip install --upgrade pip
|
||||
pip install .[sklearn,testing,sentencepiece,flax,flax-speech,vision]
|
||||
|
||||
- name: Launcher docker
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Are GPUs recognized by our DL frameworks
|
||||
run: |
|
||||
python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)"
|
||||
python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))"
|
||||
|
||||
# - name: Fetch the tests to run
|
||||
# run: |
|
||||
# python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
|
||||
|
||||
- name: Report fetched tests
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: test_fetched
|
||||
path: test_preparation.txt
|
||||
|
||||
- name: Run all non-slow tests on GPU
|
||||
run: |
|
||||
python -m pytest -n 2 --dist=loadfile -v --make-reports=tests_flax_gpu
|
||||
# if [ -f test_list.txt ]; then
|
||||
# python -m pytest -n 2 --dist=loadfile -v --make-reports=tests_flax_gpu $(cat test_list.txt)
|
||||
# fi
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
run: cat reports/tests_flax_gpu_failures_short.txt
|
||||
|
||||
- name: Test suite reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: run_all_tests_flax_gpu_test_reports
|
||||
path: reports
|
||||
|
||||
# run_tests_tf_gpu:
|
||||
# runs-on: [self-hosted, docker-gpu, single-gpu]
|
||||
# timeout-minutes: 120
|
||||
@@ -89,7 +144,7 @@ jobs:
|
||||
# run: |
|
||||
# apt -y update && apt install -y software-properties-common && apt -y update && add-apt-repository -y ppa:git-core/ppa && apt -y update && apt install -y git
|
||||
# pip install --upgrade pip
|
||||
# pip install .[sklearn,testing,onnxruntime,sentencepiece]
|
||||
# pip install .[sklearn,testing,onnxruntime,sentencepiece,tf-speech]
|
||||
#
|
||||
# - name: Launcher docker
|
||||
# uses: actions/checkout@v2
|
||||
@@ -147,7 +202,7 @@ jobs:
|
||||
apt -y update && apt install -y software-properties-common && apt -y update && add-apt-repository -y ppa:git-core/ppa && apt -y update && apt install -y git
|
||||
apt install -y libsndfile1-dev
|
||||
pip install --upgrade pip
|
||||
pip install .[sklearn,testing,onnxruntime,sentencepiece,speech,vision,timm]
|
||||
pip install .[sklearn,testing,onnxruntime,sentencepiece,torch-speech,vision,timm]
|
||||
|
||||
- name: Launcher docker
|
||||
uses: actions/checkout@v2
|
||||
@@ -195,6 +250,61 @@ jobs:
|
||||
name: run_all_tests_torch_multi_gpu_test_reports
|
||||
path: reports
|
||||
|
||||
run_tests_flax_multi_gpu:
|
||||
runs-on: [self-hosted, docker-gpu, multi-gpu]
|
||||
container:
|
||||
image: tensorflow/tensorflow:2.4.1-gpu
|
||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
apt -y update && apt install -y software-properties-common && apt -y update && add-apt-repository -y ppa:git-core/ppa && apt -y update && apt install -y git
|
||||
pip install --upgrade "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html
|
||||
pip install --upgrade pip
|
||||
pip install .[sklearn,testing,sentencepiece,flax,flax-speech,vision]
|
||||
|
||||
- name: Launcher docker
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Are GPUs recognized by our DL frameworks
|
||||
run: |
|
||||
python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)"
|
||||
python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))"
|
||||
|
||||
# - name: Fetch the tests to run
|
||||
# run: |
|
||||
# python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
|
||||
|
||||
- name: Report fetched tests
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: test_fetched
|
||||
path: test_preparation.txt
|
||||
|
||||
- name: Run all non-slow tests on GPU
|
||||
run: |
|
||||
python -m pytest -n 2 --dist=loadfile -v --make-reports=tests_flax_multi_gpu
|
||||
# if [ -f test_list.txt ]; then
|
||||
# python -m pytest -n 2 --dist=loadfile -v --make-reports=tests_flax_multi_gpu $(cat test_list.txt)
|
||||
# fi
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
run: cat reports/tests_flax_multi_gpu_failures_short.txt
|
||||
|
||||
- name: Test suite reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: run_all_tests_flax_multi_gpu_test_reports
|
||||
path: reports
|
||||
|
||||
# run_tests_tf_multi_gpu:
|
||||
# runs-on: [self-hosted, docker-gpu, multi-gpu]
|
||||
# timeout-minutes: 120
|
||||
@@ -206,7 +316,7 @@ jobs:
|
||||
# run: |
|
||||
# apt -y update && apt install -y software-properties-common && apt -y update && add-apt-repository -y ppa:git-core/ppa && apt -y update && apt install -y git
|
||||
# pip install --upgrade pip
|
||||
# pip install .[sklearn,testing,onnxruntime,sentencepiece]
|
||||
# pip install .[sklearn,testing,onnxruntime,sentencepiece,tf-speech]
|
||||
#
|
||||
# - name: Launcher docker
|
||||
# uses: actions/checkout@v2
|
||||
|
||||
86
.github/workflows/self-scheduled.yml
vendored
86
.github/workflows/self-scheduled.yml
vendored
@@ -34,7 +34,7 @@ jobs:
|
||||
run: |
|
||||
apt -y update && apt install -y libsndfile1-dev git
|
||||
pip install --upgrade pip
|
||||
pip install .[integrations,sklearn,testing,onnxruntime,sentencepiece,speech,vision,timm]
|
||||
pip install .[integrations,sklearn,testing,onnxruntime,sentencepiece,torch-speech,vision,timm]
|
||||
|
||||
- name: Are GPUs recognized by our DL frameworks
|
||||
run: |
|
||||
@@ -85,6 +85,45 @@ jobs:
|
||||
name: run_all_tests_torch_gpu_test_reports
|
||||
path: reports
|
||||
|
||||
run_all_tests_flax_gpu:
|
||||
runs-on: [self-hosted, docker-gpu, single-gpu]
|
||||
container:
|
||||
image: tensorflow/tensorflow:2.4.1-gpu
|
||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
steps:
|
||||
- name: Launcher docker
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install --upgrade pip
|
||||
pip install --upgrade "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html
|
||||
pip install .[flax,integrations,sklearn,testing,sentencepiece,flax-speech,vision]
|
||||
|
||||
- name: Are GPUs recognized by our DL frameworks
|
||||
run: |
|
||||
python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)"
|
||||
python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))"
|
||||
|
||||
- name: Run all tests on GPU
|
||||
run: |
|
||||
python -m pytest -n 1 -v --dist=loadfile --make-reports=tests_flax_gpu tests
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ always() }}
|
||||
run: cat reports/tests_flax_gpu_failures_short.txt
|
||||
|
||||
- name: Test suite reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: run_all_tests_flax_gpu_test_reports
|
||||
path: reports
|
||||
|
||||
run_all_tests_tf_gpu:
|
||||
runs-on: [self-hosted, docker-gpu, single-gpu]
|
||||
container:
|
||||
@@ -102,7 +141,7 @@ jobs:
|
||||
run: |
|
||||
apt -y update && apt install -y git
|
||||
pip install --upgrade pip
|
||||
pip install .[sklearn,testing,onnx,sentencepiece]
|
||||
pip install .[sklearn,testing,onnx,sentencepiece,tf-speech]
|
||||
|
||||
- name: Are GPUs recognized by our DL frameworks
|
||||
run: |
|
||||
@@ -158,7 +197,7 @@ jobs:
|
||||
run: |
|
||||
apt -y update && apt install -y libsndfile1-dev git
|
||||
pip install --upgrade pip
|
||||
pip install .[integrations,sklearn,testing,onnxruntime,sentencepiece,speech,vision,timm]
|
||||
pip install .[integrations,sklearn,testing,onnxruntime,sentencepiece,torch-speech,vision,timm]
|
||||
|
||||
- name: Are GPUs recognized by our DL frameworks
|
||||
run: |
|
||||
@@ -213,7 +252,7 @@ jobs:
|
||||
run: |
|
||||
apt -y update && apt install -y git
|
||||
pip install --upgrade pip
|
||||
pip install .[sklearn,testing,onnx,sentencepiece]
|
||||
pip install .[sklearn,testing,onnx,sentencepiece,tf-speech]
|
||||
|
||||
- name: Are GPUs recognized by our DL frameworks
|
||||
run: |
|
||||
@@ -251,6 +290,45 @@ jobs:
|
||||
name: run_all_tests_tf_multi_gpu_test_reports
|
||||
path: reports
|
||||
|
||||
run_all_tests_flax_multi_gpu:
|
||||
runs-on: [self-hosted, docker-gpu, multi-gpu]
|
||||
container:
|
||||
image: tensorflow/tensorflow:2.4.1-gpu
|
||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
steps:
|
||||
- name: Launcher docker
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install --upgrade pip
|
||||
pip install --upgrade "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html
|
||||
pip install .[flax,integrations,sklearn,testing,sentencepiece,flax-speech,vision]
|
||||
|
||||
- name: Are GPUs recognized by our DL frameworks
|
||||
run: |
|
||||
python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)"
|
||||
python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))"
|
||||
|
||||
- name: Run all tests on GPU
|
||||
run: |
|
||||
python -m pytest -n 1 -v --dist=loadfile --make-reports=tests_flax_gpu tests
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ always() }}
|
||||
run: cat reports/tests_flax_gpu_failures_short.txt
|
||||
|
||||
- name: Test suite reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: run_all_tests_flax_gpu_test_reports
|
||||
path: reports
|
||||
|
||||
run_all_tests_torch_cuda_extensions_gpu:
|
||||
runs-on: [self-hosted, docker-gpu, single-gpu]
|
||||
container:
|
||||
|
||||
Reference in New Issue
Block a user