Multi-GPU setup (#7453)
This commit is contained in:
52
.github/workflows/self-push.yml
vendored
52
.github/workflows/self-push.yml
vendored
@@ -14,7 +14,7 @@ on:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_tests_torch_and_tf_gpu:
|
run_tests_torch_and_tf_gpu:
|
||||||
runs-on: self-hosted
|
runs-on: [self-hosted, single-gpu]
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
- name: Python version
|
- name: Python version
|
||||||
@@ -62,3 +62,53 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
source .env/bin/activate
|
source .env/bin/activate
|
||||||
python -m pytest -n 2 --dist=loadfile -s ./tests/
|
python -m pytest -n 2 --dist=loadfile -s ./tests/
|
||||||
|
|
||||||
|
run_tests_torch_and_tf_multiple_gpu:
|
||||||
|
runs-on: [self-hosted, multi-gpu]
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- name: Python version
|
||||||
|
run: |
|
||||||
|
which python
|
||||||
|
python --version
|
||||||
|
pip --version
|
||||||
|
- name: Current dir
|
||||||
|
run: pwd
|
||||||
|
- run: nvidia-smi
|
||||||
|
|
||||||
|
- name: Loading cache.
|
||||||
|
uses: actions/cache@v2
|
||||||
|
id: cache
|
||||||
|
with:
|
||||||
|
path: .env
|
||||||
|
key: v0-tests_tf_torch_multiple_gpu-${{ hashFiles('setup.py') }}
|
||||||
|
|
||||||
|
- name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
|
||||||
|
run: |
|
||||||
|
python -m venv .env
|
||||||
|
source .env/bin/activate
|
||||||
|
which python
|
||||||
|
python --version
|
||||||
|
pip --version
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
source .env/bin/activate
|
||||||
|
pip install --upgrade pip
|
||||||
|
pip install torch!=1.6.0
|
||||||
|
pip install .[sklearn,testing,onnxruntime]
|
||||||
|
pip install git+https://github.com/huggingface/datasets
|
||||||
|
|
||||||
|
- name: Are GPUs recognized by our DL frameworks
|
||||||
|
run: |
|
||||||
|
source .env/bin/activate
|
||||||
|
python -c "import torch; print(torch.cuda.is_available())"
|
||||||
|
|
||||||
|
- name: Run all non-slow tests on GPU
|
||||||
|
env:
|
||||||
|
TF_FORCE_GPU_ALLOW_GROWTH: "true"
|
||||||
|
# TF_GPU_MEMORY_LIMIT: 4096
|
||||||
|
OMP_NUM_THREADS: 1
|
||||||
|
USE_CUDA: yes
|
||||||
|
run: |
|
||||||
|
source .env/bin/activate
|
||||||
|
python -m pytest -n 2 --dist=loadfile -s ./tests/
|
||||||
|
|||||||
64
.github/workflows/self-scheduled.yml
vendored
64
.github/workflows/self-scheduled.yml
vendored
@@ -10,7 +10,7 @@ on:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_all_tests_torch_and_tf_gpu:
|
run_all_tests_torch_and_tf_gpu:
|
||||||
runs-on: self-hosted
|
runs-on: [self-hosted, single-gpu]
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
|
|
||||||
@@ -70,3 +70,65 @@ jobs:
|
|||||||
source .env/bin/activate
|
source .env/bin/activate
|
||||||
pip install -r examples/requirements.txt
|
pip install -r examples/requirements.txt
|
||||||
python -m pytest -n 1 --dist=loadfile -s examples
|
python -m pytest -n 1 --dist=loadfile -s examples
|
||||||
|
|
||||||
|
run_all_tests_torch_and_tf_multiple_gpu:
|
||||||
|
runs-on: [self-hosted, multi-gpu]
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
|
||||||
|
- name: Loading cache.
|
||||||
|
uses: actions/cache@v2
|
||||||
|
id: cache
|
||||||
|
with:
|
||||||
|
path: .env
|
||||||
|
key: v0-slow_tests_tf_torch_multi_gpu-${{ hashFiles('setup.py') }}
|
||||||
|
|
||||||
|
- name: Python version
|
||||||
|
run: |
|
||||||
|
which python
|
||||||
|
python --version
|
||||||
|
pip --version
|
||||||
|
- name: Current dir
|
||||||
|
run: pwd
|
||||||
|
- run: nvidia-smi
|
||||||
|
- name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
|
||||||
|
if: steps.cache.outputs.cache-hit != 'true'
|
||||||
|
run: |
|
||||||
|
python -m venv .env
|
||||||
|
source .env/bin/activate
|
||||||
|
which python
|
||||||
|
python --version
|
||||||
|
pip --version
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
source .env/bin/activate
|
||||||
|
pip install --upgrade pip
|
||||||
|
pip install torch!=1.6.0
|
||||||
|
pip install .[sklearn,testing,onnxruntime]
|
||||||
|
pip install git+https://github.com/huggingface/datasets
|
||||||
|
|
||||||
|
- name: Are GPUs recognized by our DL frameworks
|
||||||
|
run: |
|
||||||
|
source .env/bin/activate
|
||||||
|
python -c "import torch; print(torch.cuda.is_available())"
|
||||||
|
|
||||||
|
- name: Run all tests on GPU
|
||||||
|
env:
|
||||||
|
TF_FORCE_GPU_ALLOW_GROWTH: "true"
|
||||||
|
OMP_NUM_THREADS: 1
|
||||||
|
RUN_SLOW: yes
|
||||||
|
USE_CUDA: yes
|
||||||
|
run: |
|
||||||
|
source .env/bin/activate
|
||||||
|
python -m pytest -n 1 --dist=loadfile -s ./tests/
|
||||||
|
|
||||||
|
- name: Run examples tests on GPU
|
||||||
|
env:
|
||||||
|
TF_FORCE_GPU_ALLOW_GROWTH: "true"
|
||||||
|
OMP_NUM_THREADS: 1
|
||||||
|
RUN_SLOW: yes
|
||||||
|
USE_CUDA: yes
|
||||||
|
run: |
|
||||||
|
source .env/bin/activate
|
||||||
|
pip install -r examples/requirements.txt
|
||||||
|
python -m pytest -n 1 --dist=loadfile -s examples
|
||||||
|
|||||||
Reference in New Issue
Block a user