[ci] Run all tests on (self-hosted) GPU (#3020)

* Create self-hosted.yml * Update self-hosted.yml * Update self-hosted.yml * Update self-hosted.yml * Update self-hosted.yml * Update self-hosted.yml * do not run slow tests, for now * [ci] For comparison with circleci, let's also run CPU-tests * [ci] reorganize * clearer filenames * [ci] Final tweaks before merging * rm slow tests on circle ci * Trigger CI * On GPU this concurrency was way too high
2020-02-28 21:11:08 -05:00
parent 908fa43b54
commit e36bd94345
4 changed files with 116 additions and 16 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -14,22 +14,6 @@ jobs:
            - run: sudo pip install codecov pytest-cov
            - run: python -m pytest -n 8 --dist=loadfile -s -v ./tests/ --cov
            - run: codecov
    run_all_tests_torch_and_tf:
        working_directory: ~/transformers
        docker:
            - image: circleci/python:3.5
        environment:
            OMP_NUM_THREADS: 1
            RUN_SLOW: yes
            RUN_CUSTOM_TOKENIZERS: yes
        resource_class: xlarge
        parallelism: 1
        steps:
            - checkout
            - run: sudo pip install .[mecab,sklearn,tf-cpu,torch,testing]
            - run:
                command: python -m pytest -n 8 --dist=loadfile -s -v ./tests/
                no_output_timeout: 4h
    run_tests_torch:
        working_directory: ~/transformers
--- a/.github/workflows/github-push.yml
+++ b/.github/workflows/github-push.yml
@@ -0,0 +1,19 @@
 name: GitHub-hosted runner
 on: push
 jobs:
  check_code_quality:
    runs-on: ubuntu-18.04
    steps:
    - uses: actions/checkout@v2
    - name: Set up Python
      uses: actions/setup-python@v1
      with:
        python-version: 3.7
    - name: Install dependencies
      run: |
        pip install .[tf,torch,quality]
--- a/.github/workflows/self-push.yml
+++ b/.github/workflows/self-push.yml
@@ -0,0 +1,47 @@
 name: Self-hosted runner (push)
 on: 
  push:
    branches:
      - master
  pull_request:
 jobs:
  run_tests_torch_and_tf_gpu:
    runs-on: self-hosted
    steps:
    - uses: actions/checkout@v2
    - name: Python version
      run: |
        which python
        python --version
        pip --version
    - name: Current dir
      run: pwd
    - run: nvidia-smi
    - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
      run: |
        python -m venv .env
        source .env/bin/activate
        which python
        python --version
        pip --version
    - name: Install dependencies
      run: |
        source .env/bin/activate
        pip install .[sklearn,tf,torch,testing]
    - name: Are GPUs recognized by our DL frameworks
      run: |
        source .env/bin/activate
        python -c "import torch; print(torch.cuda.is_available())"
        python -c "import tensorflow as tf; print(tf.test.is_built_with_cuda(), tf.config.list_physical_devices('GPU'))"
    - name: Run all non-slow tests on GPU
      env:
        OMP_NUM_THREADS: 1
        USE_CUDA: yes
      run: |
        source .env/bin/activate
        python -m pytest -n 2 --dist=loadfile -s -v ./tests/
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@@ -0,0 +1,50 @@
 name: Self-hosted runner (scheduled)
 on:
  push:
    branches:
      - ci_*
  repository_dispatch:
  schedule:
    - cron: "0 0 * * *"
 jobs:
  run_all_tests_torch_and_tf_gpu:
    runs-on: self-hosted
    steps:
    - uses: actions/checkout@v2
    - name: Python version
      run: |
        which python
        python --version
        pip --version
    - name: Current dir
      run: pwd
    - run: nvidia-smi
    - name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
      run: |
        python -m venv .env
        source .env/bin/activate
        which python
        python --version
        pip --version
    - name: Install dependencies
      run: |
        source .env/bin/activate
        pip install .[sklearn,tf,torch,testing]
    - name: Are GPUs recognized by our DL frameworks
      run: |
        source .env/bin/activate
        python -c "import torch; print(torch.cuda.is_available())"
        python -c "import tensorflow as tf; print(tf.test.is_built_with_cuda(), tf.config.list_physical_devices('GPU'))"
    - name: Run all tests on GPU
      env:
        OMP_NUM_THREADS: 1
        RUN_SLOW: yes
        USE_CUDA: yes
      run: |
        source .env/bin/activate
        python -m pytest -n 1 --dist=loadfile -s -v ./tests/