Comment fast GPU TF tests (#12452)
This commit is contained in:
168
.github/workflows/self-push.yml
vendored
168
.github/workflows/self-push.yml
vendored
@@ -61,47 +61,47 @@ jobs:
|
|||||||
name: run_all_tests_torch_gpu_test_reports
|
name: run_all_tests_torch_gpu_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
|
|
||||||
run_tests_tf_gpu:
|
# run_tests_tf_gpu:
|
||||||
runs-on: [self-hosted, docker-gpu, single-gpu]
|
# runs-on: [self-hosted, docker-gpu, single-gpu]
|
||||||
timeout-minutes: 120
|
# timeout-minutes: 120
|
||||||
container:
|
# container:
|
||||||
image: tensorflow/tensorflow:2.4.1-gpu
|
# image: tensorflow/tensorflow:2.4.1-gpu
|
||||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
# options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
steps:
|
# steps:
|
||||||
- name: Launcher docker
|
# - name: Launcher docker
|
||||||
uses: actions/checkout@v2
|
# uses: actions/checkout@v2
|
||||||
|
#
|
||||||
- name: NVIDIA-SMI
|
# - name: NVIDIA-SMI
|
||||||
run: |
|
# run: |
|
||||||
nvidia-smi
|
# nvidia-smi
|
||||||
|
#
|
||||||
- name: Install dependencies
|
# - name: Install dependencies
|
||||||
run: |
|
# run: |
|
||||||
pip install --upgrade pip
|
# pip install --upgrade pip
|
||||||
pip install .[sklearn,testing,onnxruntime,sentencepiece]
|
# pip install .[sklearn,testing,onnxruntime,sentencepiece]
|
||||||
|
#
|
||||||
- name: Are GPUs recognized by our DL frameworks
|
# - name: Are GPUs recognized by our DL frameworks
|
||||||
run: |
|
# run: |
|
||||||
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
|
# TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
|
||||||
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
|
# TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
|
||||||
|
#
|
||||||
- name: Run all non-slow tests on GPU
|
# - name: Run all non-slow tests on GPU
|
||||||
env:
|
# env:
|
||||||
TF_NUM_INTRAOP_THREADS: 8
|
# TF_NUM_INTRAOP_THREADS: 8
|
||||||
TF_NUM_INTEROP_THREADS: 1
|
# TF_NUM_INTEROP_THREADS: 1
|
||||||
run: |
|
# run: |
|
||||||
python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_gpu tests
|
# python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_gpu tests
|
||||||
|
#
|
||||||
- name: Failure short reports
|
# - name: Failure short reports
|
||||||
if: ${{ always() }}
|
# if: ${{ always() }}
|
||||||
run: cat reports/tests_tf_gpu_failures_short.txt
|
# run: cat reports/tests_tf_gpu_failures_short.txt
|
||||||
|
#
|
||||||
- name: Test suite reports artifacts
|
# - name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
# if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v2
|
# uses: actions/upload-artifact@v2
|
||||||
with:
|
# with:
|
||||||
name: run_all_tests_tf_gpu_test_reports
|
# name: run_all_tests_tf_gpu_test_reports
|
||||||
path: reports
|
# path: reports
|
||||||
|
|
||||||
|
|
||||||
run_tests_torch_multi_gpu:
|
run_tests_torch_multi_gpu:
|
||||||
@@ -147,47 +147,47 @@ jobs:
|
|||||||
name: run_all_tests_torch_multi_gpu_test_reports
|
name: run_all_tests_torch_multi_gpu_test_reports
|
||||||
path: reports
|
path: reports
|
||||||
|
|
||||||
run_tests_tf_multi_gpu:
|
# run_tests_tf_multi_gpu:
|
||||||
runs-on: [self-hosted, docker-gpu, multi-gpu]
|
# runs-on: [self-hosted, docker-gpu, multi-gpu]
|
||||||
timeout-minutes: 120
|
# timeout-minutes: 120
|
||||||
container:
|
# container:
|
||||||
image: tensorflow/tensorflow:2.4.1-gpu
|
# image: tensorflow/tensorflow:2.4.1-gpu
|
||||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
# options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
steps:
|
# steps:
|
||||||
- name: Launcher docker
|
# - name: Launcher docker
|
||||||
uses: actions/checkout@v2
|
# uses: actions/checkout@v2
|
||||||
|
#
|
||||||
- name: NVIDIA-SMI
|
# - name: NVIDIA-SMI
|
||||||
run: |
|
# run: |
|
||||||
nvidia-smi
|
# nvidia-smi
|
||||||
|
#
|
||||||
- name: Install dependencies
|
# - name: Install dependencies
|
||||||
run: |
|
# run: |
|
||||||
pip install --upgrade pip
|
# pip install --upgrade pip
|
||||||
pip install .[sklearn,testing,onnxruntime,sentencepiece]
|
# pip install .[sklearn,testing,onnxruntime,sentencepiece]
|
||||||
|
#
|
||||||
- name: Are GPUs recognized by our DL frameworks
|
# - name: Are GPUs recognized by our DL frameworks
|
||||||
run: |
|
# run: |
|
||||||
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
|
# TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
|
||||||
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
|
# TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
|
||||||
|
#
|
||||||
- name: Run all non-slow tests on GPU
|
# - name: Run all non-slow tests on GPU
|
||||||
env:
|
# env:
|
||||||
TF_NUM_INTRAOP_THREADS: 8
|
# TF_NUM_INTRAOP_THREADS: 8
|
||||||
TF_NUM_INTEROP_THREADS: 1
|
# TF_NUM_INTEROP_THREADS: 1
|
||||||
run: |
|
# run: |
|
||||||
python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_multi_gpu tests
|
# python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_multi_gpu tests
|
||||||
|
#
|
||||||
- name: Failure short reports
|
# - name: Failure short reports
|
||||||
if: ${{ always() }}
|
# if: ${{ always() }}
|
||||||
run: cat reports/tests_tf_multi_gpu_failures_short.txt
|
# run: cat reports/tests_tf_multi_gpu_failures_short.txt
|
||||||
|
#
|
||||||
- name: Test suite reports artifacts
|
# - name: Test suite reports artifacts
|
||||||
if: ${{ always() }}
|
# if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@v2
|
# uses: actions/upload-artifact@v2
|
||||||
with:
|
# with:
|
||||||
name: run_all_tests_tf_multi_gpu_test_reports
|
# name: run_all_tests_tf_multi_gpu_test_reports
|
||||||
path: reports
|
# path: reports
|
||||||
|
|
||||||
run_tests_torch_cuda_extensions_gpu:
|
run_tests_torch_cuda_extensions_gpu:
|
||||||
runs-on: [self-hosted, docker-gpu, single-gpu]
|
runs-on: [self-hosted, docker-gpu, single-gpu]
|
||||||
@@ -278,9 +278,9 @@ jobs:
|
|||||||
if: always()
|
if: always()
|
||||||
needs: [
|
needs: [
|
||||||
run_tests_torch_gpu,
|
run_tests_torch_gpu,
|
||||||
run_tests_tf_gpu,
|
# run_tests_tf_gpu,
|
||||||
run_tests_torch_multi_gpu,
|
run_tests_torch_multi_gpu,
|
||||||
run_tests_tf_multi_gpu,
|
# run_tests_tf_multi_gpu,
|
||||||
run_tests_torch_cuda_extensions_gpu,
|
run_tests_torch_cuda_extensions_gpu,
|
||||||
run_tests_torch_cuda_extensions_multi_gpu
|
run_tests_torch_cuda_extensions_multi_gpu
|
||||||
]
|
]
|
||||||
|
|||||||
Reference in New Issue
Block a user