From e36bd94345af6045108a391f9ac7f4dc557548de Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Fri, 28 Feb 2020 21:11:08 -0500 Subject: [PATCH] [ci] Run all tests on (self-hosted) GPU (#3020) * Create self-hosted.yml * Update self-hosted.yml * Update self-hosted.yml * Update self-hosted.yml * Update self-hosted.yml * Update self-hosted.yml * do not run slow tests, for now * [ci] For comparison with circleci, let's also run CPU-tests * [ci] reorganize * clearer filenames * [ci] Final tweaks before merging * rm slow tests on circle ci * Trigger CI * On GPU this concurrency was way too high --- .circleci/config.yml | 16 --------- .github/workflows/github-push.yml | 19 +++++++++++ .github/workflows/self-push.yml | 47 ++++++++++++++++++++++++++ .github/workflows/self-scheduled.yml | 50 ++++++++++++++++++++++++++++ 4 files changed, 116 insertions(+), 16 deletions(-) create mode 100644 .github/workflows/github-push.yml create mode 100644 .github/workflows/self-push.yml create mode 100644 .github/workflows/self-scheduled.yml diff --git a/.circleci/config.yml b/.circleci/config.yml index c80430ea68..2ee83e6701 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -14,22 +14,6 @@ jobs: - run: sudo pip install codecov pytest-cov - run: python -m pytest -n 8 --dist=loadfile -s -v ./tests/ --cov - run: codecov - run_all_tests_torch_and_tf: - working_directory: ~/transformers - docker: - - image: circleci/python:3.5 - environment: - OMP_NUM_THREADS: 1 - RUN_SLOW: yes - RUN_CUSTOM_TOKENIZERS: yes - resource_class: xlarge - parallelism: 1 - steps: - - checkout - - run: sudo pip install .[mecab,sklearn,tf-cpu,torch,testing] - - run: - command: python -m pytest -n 8 --dist=loadfile -s -v ./tests/ - no_output_timeout: 4h run_tests_torch: working_directory: ~/transformers diff --git a/.github/workflows/github-push.yml b/.github/workflows/github-push.yml new file mode 100644 index 0000000000..59d3dc5158 --- /dev/null +++ b/.github/workflows/github-push.yml @@ -0,0 +1,19 @@ +name: GitHub-hosted runner + +on: push + +jobs: + check_code_quality: + runs-on: ubuntu-18.04 + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v1 + with: + python-version: 3.7 + - name: Install dependencies + run: | + pip install .[tf,torch,quality] + + + diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml new file mode 100644 index 0000000000..cfbe999699 --- /dev/null +++ b/.github/workflows/self-push.yml @@ -0,0 +1,47 @@ +name: Self-hosted runner (push) + +on: + push: + branches: + - master + pull_request: + + +jobs: + run_tests_torch_and_tf_gpu: + runs-on: self-hosted + steps: + - uses: actions/checkout@v2 + - name: Python version + run: | + which python + python --version + pip --version + - name: Current dir + run: pwd + - run: nvidia-smi + - name: Create new python env (on self-hosted runners we have to handle isolation ourselves) + run: | + python -m venv .env + source .env/bin/activate + which python + python --version + pip --version + - name: Install dependencies + run: | + source .env/bin/activate + pip install .[sklearn,tf,torch,testing] + + - name: Are GPUs recognized by our DL frameworks + run: | + source .env/bin/activate + python -c "import torch; print(torch.cuda.is_available())" + python -c "import tensorflow as tf; print(tf.test.is_built_with_cuda(), tf.config.list_physical_devices('GPU'))" + + - name: Run all non-slow tests on GPU + env: + OMP_NUM_THREADS: 1 + USE_CUDA: yes + run: | + source .env/bin/activate + python -m pytest -n 2 --dist=loadfile -s -v ./tests/ diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml new file mode 100644 index 0000000000..7c33d5dfcb --- /dev/null +++ b/.github/workflows/self-scheduled.yml @@ -0,0 +1,50 @@ +name: Self-hosted runner (scheduled) + +on: + push: + branches: + - ci_* + repository_dispatch: + schedule: + - cron: "0 0 * * *" + +jobs: + run_all_tests_torch_and_tf_gpu: + runs-on: self-hosted + steps: + - uses: actions/checkout@v2 + - name: Python version + run: | + which python + python --version + pip --version + - name: Current dir + run: pwd + - run: nvidia-smi + - name: Create new python env (on self-hosted runners we have to handle isolation ourselves) + run: | + python -m venv .env + source .env/bin/activate + which python + python --version + pip --version + - name: Install dependencies + run: | + source .env/bin/activate + pip install .[sklearn,tf,torch,testing] + + - name: Are GPUs recognized by our DL frameworks + run: | + source .env/bin/activate + python -c "import torch; print(torch.cuda.is_available())" + python -c "import tensorflow as tf; print(tf.test.is_built_with_cuda(), tf.config.list_physical_devices('GPU'))" + + - name: Run all tests on GPU + env: + OMP_NUM_THREADS: 1 + RUN_SLOW: yes + USE_CUDA: yes + run: | + source .env/bin/activate + python -m pytest -n 1 --dist=loadfile -s -v ./tests/ +