Reduce to 1 worker and set timeout for GPU TF tests (#11633)

This commit is contained in:
Lysandre Debut
2021-05-07 17:55:20 +02:00
committed by GitHub
parent 39084ca663
commit da37eb8e43

View File

@@ -63,6 +63,7 @@ jobs:
run_tests_tf_gpu: run_tests_tf_gpu:
runs-on: [self-hosted, docker-gpu, single-gpu] runs-on: [self-hosted, docker-gpu, single-gpu]
timeout-minutes: 120
container: container:
image: tensorflow/tensorflow:2.4.1-gpu image: tensorflow/tensorflow:2.4.1-gpu
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@@ -89,7 +90,7 @@ jobs:
TF_NUM_INTRAOP_THREADS: 8 TF_NUM_INTRAOP_THREADS: 8
TF_NUM_INTEROP_THREADS: 1 TF_NUM_INTEROP_THREADS: 1
run: | run: |
python -m pytest -n 2 --dist=loadfile --make-reports=tests_tf_gpu tests python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_gpu tests
- name: Failure short reports - name: Failure short reports
if: ${{ always() }} if: ${{ always() }}
@@ -148,6 +149,7 @@ jobs:
run_tests_tf_multi_gpu: run_tests_tf_multi_gpu:
runs-on: [self-hosted, docker-gpu, multi-gpu] runs-on: [self-hosted, docker-gpu, multi-gpu]
timeout-minutes: 120
container: container:
image: tensorflow/tensorflow:2.4.1-gpu image: tensorflow/tensorflow:2.4.1-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@@ -174,7 +176,7 @@ jobs:
TF_NUM_INTRAOP_THREADS: 8 TF_NUM_INTRAOP_THREADS: 8
TF_NUM_INTEROP_THREADS: 1 TF_NUM_INTEROP_THREADS: 1
run: | run: |
python -m pytest -n 2 --dist=loadfile --make-reports=tests_tf_multi_gpu tests python -m pytest -n 1 --dist=loadfile --make-reports=tests_tf_multi_gpu tests
- name: Failure short reports - name: Failure short reports
if: ${{ always() }} if: ${{ always() }}