From e36bd94345af6045108a391f9ac7f4dc557548de Mon Sep 17 00:00:00 2001
From: Julien Chaumond <chaumond@gmail.com>
Date: Fri, 28 Feb 2020 21:11:08 -0500
Subject: [PATCH] [ci] Run all tests on (self-hosted) GPU (#3020)

* Create self-hosted.yml

* Update self-hosted.yml

* Update self-hosted.yml

* Update self-hosted.yml

* Update self-hosted.yml

* Update self-hosted.yml

* do not run slow tests, for now

* [ci] For comparison with circleci, let's also run CPU-tests

* [ci] reorganize

* clearer filenames

* [ci] Final tweaks before merging

* rm slow tests on circle ci

* Trigger CI

* On GPU this concurrency was way too high
---
 .circleci/config.yml                 | 16 ---------
 .github/workflows/github-push.yml    | 19 +++++++++++
 .github/workflows/self-push.yml      | 47 ++++++++++++++++++++++++++
 .github/workflows/self-scheduled.yml | 50 ++++++++++++++++++++++++++++
 4 files changed, 116 insertions(+), 16 deletions(-)
 create mode 100644 .github/workflows/github-push.yml
 create mode 100644 .github/workflows/self-push.yml
 create mode 100644 .github/workflows/self-scheduled.yml

diff --git a/.circleci/config.yml b/.circleci/config.yml
index c80430ea68..2ee83e6701 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -14,22 +14,6 @@ jobs:
             - run: sudo pip install codecov pytest-cov
             - run: python -m pytest -n 8 --dist=loadfile -s -v ./tests/ --cov
             - run: codecov
-    run_all_tests_torch_and_tf:
-        working_directory: ~/transformers
-        docker:
-            - image: circleci/python:3.5
-        environment:
-            OMP_NUM_THREADS: 1
-            RUN_SLOW: yes
-            RUN_CUSTOM_TOKENIZERS: yes
-        resource_class: xlarge
-        parallelism: 1
-        steps:
-            - checkout
-            - run: sudo pip install .[mecab,sklearn,tf-cpu,torch,testing]
-            - run:
-                command: python -m pytest -n 8 --dist=loadfile -s -v ./tests/
-                no_output_timeout: 4h
 
     run_tests_torch:
         working_directory: ~/transformers
diff --git a/.github/workflows/github-push.yml b/.github/workflows/github-push.yml
new file mode 100644
index 0000000000..59d3dc5158
--- /dev/null
+++ b/.github/workflows/github-push.yml
@@ -0,0 +1,19 @@
+name: GitHub-hosted runner
+
+on: push
+
+jobs:
+  check_code_quality:
+    runs-on: ubuntu-18.04
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v1
+      with:
+        python-version: 3.7
+    - name: Install dependencies
+      run: |
+        pip install .[tf,torch,quality]
+
+
+
diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml
new file mode 100644
index 0000000000..cfbe999699
--- /dev/null
+++ b/.github/workflows/self-push.yml
@@ -0,0 +1,47 @@
+name: Self-hosted runner (push)
+
+on: 
+  push:
+    branches:
+      - master
+  pull_request:
+
+
+jobs:
+  run_tests_torch_and_tf_gpu:
+    runs-on: self-hosted
+    steps:
+    - uses: actions/checkout@v2
+    - name: Python version
+      run: |
+        which python
+        python --version
+        pip --version
+    - name: Current dir
+      run: pwd
+    - run: nvidia-smi
+    - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
+      run: |
+        python -m venv .env
+        source .env/bin/activate
+        which python
+        python --version
+        pip --version
+    - name: Install dependencies
+      run: |
+        source .env/bin/activate
+        pip install .[sklearn,tf,torch,testing]
+
+    - name: Are GPUs recognized by our DL frameworks
+      run: |
+        source .env/bin/activate
+        python -c "import torch; print(torch.cuda.is_available())"
+        python -c "import tensorflow as tf; print(tf.test.is_built_with_cuda(), tf.config.list_physical_devices('GPU'))"
+
+    - name: Run all non-slow tests on GPU
+      env:
+        OMP_NUM_THREADS: 1
+        USE_CUDA: yes
+      run: |
+        source .env/bin/activate
+        python -m pytest -n 2 --dist=loadfile -s -v ./tests/
diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
new file mode 100644
index 0000000000..7c33d5dfcb
--- /dev/null
+++ b/.github/workflows/self-scheduled.yml
@@ -0,0 +1,50 @@
+name: Self-hosted runner (scheduled)
+
+on:
+  push:
+    branches:
+      - ci_*
+  repository_dispatch:
+  schedule:
+    - cron: "0 0 * * *"
+
+jobs:
+  run_all_tests_torch_and_tf_gpu:
+    runs-on: self-hosted
+    steps:
+    - uses: actions/checkout@v2
+    - name: Python version
+      run: |
+        which python
+        python --version
+        pip --version
+    - name: Current dir
+      run: pwd
+    - run: nvidia-smi
+    - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
+      run: |
+        python -m venv .env
+        source .env/bin/activate
+        which python
+        python --version
+        pip --version
+    - name: Install dependencies
+      run: |
+        source .env/bin/activate
+        pip install .[sklearn,tf,torch,testing]
+
+    - name: Are GPUs recognized by our DL frameworks
+      run: |
+        source .env/bin/activate
+        python -c "import torch; print(torch.cuda.is_available())"
+        python -c "import tensorflow as tf; print(tf.test.is_built_with_cuda(), tf.config.list_physical_devices('GPU'))"
+
+    - name: Run all tests on GPU
+      env:
+        OMP_NUM_THREADS: 1
+        RUN_SLOW: yes
+        USE_CUDA: yes
+      run: |
+        source .env/bin/activate
+        python -m pytest -n 1 --dist=loadfile -s -v ./tests/
+