[Testing] Add Flax Tests on GPU, Add Speech and Vision to Flax & TF tests (#13313)

* up * finish * Apply suggestions from code review * apply Lysandres suggestions * adapt circle ci as well * finish * Update setup.py
2021-08-31 11:08:22 +02:00
parent 8b2de0e483
commit 062300ba7f
4 changed files with 215 additions and 24 deletions
--- a/.github/workflows/self-push.yml
+++ b/.github/workflows/self-push.yml
@@ -33,7 +33,7 @@ jobs:
          apt -y update && apt install -y software-properties-common && apt -y update && add-apt-repository -y ppa:git-core/ppa && apt -y update && apt install -y git
          apt install -y libsndfile1-dev
          pip install --upgrade pip
-          pip install .[sklearn,testing,onnxruntime,sentencepiece,speech,vision,timm]
+          pip install .[sklearn,testing,onnxruntime,sentencepiece,torch-speech,vision,timm]

      - name: Launcher docker
        uses: actions/checkout@v2
@@ -78,6 +78,61 @@ jobs:
          name: run_all_tests_torch_gpu_test_reports
          path: reports

+  run_tests_flax_gpu:
+    runs-on: [self-hosted, docker-gpu, single-gpu]
+    container:
+      image: tensorflow/tensorflow:2.4.1-gpu
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    steps:
+      - name: Install dependencies
+        run: |
+          apt -y update && apt install -y software-properties-common && apt -y update && add-apt-repository -y ppa:git-core/ppa && apt -y update && apt install -y git
+          pip install --upgrade "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html
+          pip install --upgrade pip
+          pip install .[sklearn,testing,sentencepiece,flax,flax-speech,vision]
+
+      - name: Launcher docker
+        uses: actions/checkout@v2
+        with:
+          fetch-depth: 2
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+      - name: Are GPUs recognized by our DL frameworks
+        run: |
+          python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)"
+          python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))"
+      
+#      - name: Fetch the tests to run
+#        run: |
+#          python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
+
+      - name: Report fetched tests
+        uses: actions/upload-artifact@v2
+        with:
+          name: test_fetched
+          path: test_preparation.txt
+
+      - name: Run all non-slow tests on GPU
+        run: |
+          python -m pytest -n 2 --dist=loadfile -v --make-reports=tests_flax_gpu
+#          if [ -f test_list.txt ]; then
+#            python -m pytest -n 2 --dist=loadfile -v --make-reports=tests_flax_gpu $(cat test_list.txt)
+#          fi
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        run: cat reports/tests_flax_gpu_failures_short.txt
+
+      - name: Test suite reports artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: run_all_tests_flax_gpu_test_reports
+          path: reports
+
 #  run_tests_tf_gpu:
 #    runs-on: [self-hosted, docker-gpu, single-gpu]
 #    timeout-minutes: 120
@@ -89,7 +144,7 @@ jobs:
 #        run: |
 #          apt -y update && apt install -y software-properties-common && apt -y update && add-apt-repository -y ppa:git-core/ppa && apt -y update && apt install -y git
 #          pip install --upgrade pip
-#          pip install .[sklearn,testing,onnxruntime,sentencepiece]
+#          pip install .[sklearn,testing,onnxruntime,sentencepiece,tf-speech]
 #
 #      - name: Launcher docker
 #        uses: actions/checkout@v2
@@ -147,7 +202,7 @@ jobs:
          apt -y update && apt install -y software-properties-common && apt -y update && add-apt-repository -y ppa:git-core/ppa && apt -y update && apt install -y git
          apt install -y libsndfile1-dev
          pip install --upgrade pip
-          pip install .[sklearn,testing,onnxruntime,sentencepiece,speech,vision,timm]
+          pip install .[sklearn,testing,onnxruntime,sentencepiece,torch-speech,vision,timm]
      
      - name: Launcher docker
        uses: actions/checkout@v2
@@ -195,6 +250,61 @@ jobs:
          name: run_all_tests_torch_multi_gpu_test_reports
          path: reports

+  run_tests_flax_multi_gpu:
+    runs-on: [self-hosted, docker-gpu, multi-gpu]
+    container:
+      image: tensorflow/tensorflow:2.4.1-gpu
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    steps:
+      - name: Install dependencies
+        run: |
+          apt -y update && apt install -y software-properties-common && apt -y update && add-apt-repository -y ppa:git-core/ppa && apt -y update && apt install -y git
+          pip install --upgrade "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html
+          pip install --upgrade pip
+          pip install .[sklearn,testing,sentencepiece,flax,flax-speech,vision]
+
+      - name: Launcher docker
+        uses: actions/checkout@v2
+        with:
+          fetch-depth: 2
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+      - name: Are GPUs recognized by our DL frameworks
+        run: |
+          python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)"
+          python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))"
+      
+#      - name: Fetch the tests to run
+#        run: |
+#          python utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
+
+      - name: Report fetched tests
+        uses: actions/upload-artifact@v2
+        with:
+          name: test_fetched
+          path: test_preparation.txt
+
+      - name: Run all non-slow tests on GPU
+        run: |
+          python -m pytest -n 2 --dist=loadfile -v --make-reports=tests_flax_multi_gpu
+#          if [ -f test_list.txt ]; then
+#            python -m pytest -n 2 --dist=loadfile -v --make-reports=tests_flax_multi_gpu $(cat test_list.txt)
+#          fi
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        run: cat reports/tests_flax_multi_gpu_failures_short.txt
+
+      - name: Test suite reports artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: run_all_tests_flax_multi_gpu_test_reports
+          path: reports
+
 #  run_tests_tf_multi_gpu:
 #    runs-on: [self-hosted, docker-gpu, multi-gpu]
 #    timeout-minutes: 120
@@ -206,7 +316,7 @@ jobs:
 #        run: |
 #          apt -y update && apt install -y software-properties-common && apt -y update && add-apt-repository -y ppa:git-core/ppa && apt -y update && apt install -y git
 #          pip install --upgrade pip
-#          pip install .[sklearn,testing,onnxruntime,sentencepiece]
+#          pip install .[sklearn,testing,onnxruntime,sentencepiece,tf-speech]
 #
 #      - name: Launcher docker
 #        uses: actions/checkout@v2
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@@ -34,7 +34,7 @@ jobs:
        run: |
          apt -y update && apt install -y libsndfile1-dev git
          pip install --upgrade pip
-          pip install .[integrations,sklearn,testing,onnxruntime,sentencepiece,speech,vision,timm]
+          pip install .[integrations,sklearn,testing,onnxruntime,sentencepiece,torch-speech,vision,timm]

      - name: Are GPUs recognized by our DL frameworks
        run: |
@@ -85,6 +85,45 @@ jobs:
          name: run_all_tests_torch_gpu_test_reports
          path: reports

+  run_all_tests_flax_gpu:
+    runs-on: [self-hosted, docker-gpu, single-gpu]
+    container:
+      image: tensorflow/tensorflow:2.4.1-gpu
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    steps:
+      - name: Launcher docker
+        uses: actions/checkout@v2
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install --upgrade "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html
+          pip install .[flax,integrations,sklearn,testing,sentencepiece,flax-speech,vision]
+
+      - name: Are GPUs recognized by our DL frameworks
+        run: |
+          python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)"
+          python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))"
+
+      - name: Run all tests on GPU
+        run: |
+          python -m pytest -n 1 -v --dist=loadfile --make-reports=tests_flax_gpu tests
+
+      - name: Failure short reports
+        if: ${{ always() }}
+        run: cat reports/tests_flax_gpu_failures_short.txt
+
+      - name: Test suite reports artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: run_all_tests_flax_gpu_test_reports
+          path: reports
+
  run_all_tests_tf_gpu:
    runs-on: [self-hosted, docker-gpu, single-gpu]
    container:
@@ -102,7 +141,7 @@ jobs:
        run: |
          apt -y update && apt install -y git
          pip install --upgrade pip
-          pip install .[sklearn,testing,onnx,sentencepiece]
+          pip install .[sklearn,testing,onnx,sentencepiece,tf-speech]

      - name: Are GPUs recognized by our DL frameworks
        run: |
@@ -158,7 +197,7 @@ jobs:
        run: |
          apt -y update && apt install -y libsndfile1-dev git
          pip install --upgrade pip
-          pip install .[integrations,sklearn,testing,onnxruntime,sentencepiece,speech,vision,timm]
+          pip install .[integrations,sklearn,testing,onnxruntime,sentencepiece,torch-speech,vision,timm]

      - name: Are GPUs recognized by our DL frameworks
        run: |
@@ -213,7 +252,7 @@ jobs:
        run: |
          apt -y update && apt install -y git
          pip install --upgrade pip
-          pip install .[sklearn,testing,onnx,sentencepiece]
+          pip install .[sklearn,testing,onnx,sentencepiece,tf-speech]

      - name: Are GPUs recognized by our DL frameworks
        run: |
@@ -251,6 +290,45 @@ jobs:
          name: run_all_tests_tf_multi_gpu_test_reports
          path: reports

+  run_all_tests_flax_multi_gpu:
+    runs-on: [self-hosted, docker-gpu, multi-gpu]
+    container:
+      image: tensorflow/tensorflow:2.4.1-gpu
+      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    steps:
+      - name: Launcher docker
+        uses: actions/checkout@v2
+
+      - name: NVIDIA-SMI
+        run: |
+          nvidia-smi
+
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install --upgrade "jax[cuda111]" -f https://storage.googleapis.com/jax-releases/jax_releases.html
+          pip install .[flax,integrations,sklearn,testing,sentencepiece,flax-speech,vision]
+
+      - name: Are GPUs recognized by our DL frameworks
+        run: |
+          python -c "from jax.lib import xla_bridge; print('GPU available:', xla_bridge.get_backend().platform)"
+          python -c "import jax; print('Number of GPUs available:', len(jax.local_devices()))"
+
+      - name: Run all tests on GPU
+        run: |
+          python -m pytest -n 1 -v --dist=loadfile --make-reports=tests_flax_gpu tests
+
+      - name: Failure short reports
+        if: ${{ always() }}
+        run: cat reports/tests_flax_gpu_failures_short.txt
+
+      - name: Test suite reports artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v2
+        with:
+          name: run_all_tests_flax_gpu_test_reports
+          path: reports
+
  run_all_tests_torch_cuda_extensions_gpu:
    runs-on: [self-hosted, docker-gpu, single-gpu]
    container: