From f681437203baa7671de3174b0fa583c349d9d5e1 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Tue, 5 Jul 2022 18:08:36 +0200 Subject: [PATCH] Enable Past CI (#17919) Co-authored-by: ydshieh --- .../workflows/build-past-ci-docker-images.yml | 108 +++++++++++ .github/workflows/self-past-caller.yml | 126 ++++++++++++ .github/workflows/self-past.yml | 179 ++++++++++++++++++ docker/transformers-past-gpu/Dockerfile | 43 +++++ utils/past_ci_versions.py | 130 +++++++++++++ 5 files changed, 586 insertions(+) create mode 100644 .github/workflows/build-past-ci-docker-images.yml create mode 100644 .github/workflows/self-past-caller.yml create mode 100644 .github/workflows/self-past.yml create mode 100644 docker/transformers-past-gpu/Dockerfile create mode 100644 utils/past_ci_versions.py diff --git a/.github/workflows/build-past-ci-docker-images.yml b/.github/workflows/build-past-ci-docker-images.yml new file mode 100644 index 0000000000..30e6ab78d6 --- /dev/null +++ b/.github/workflows/build-past-ci-docker-images.yml @@ -0,0 +1,108 @@ +name: Build docker images (Past CI) + +on: + push: + branches: + - past-ci-docker-image* + +concurrency: + group: docker-images-builds + cancel-in-progress: false + +jobs: + past-pytorch-docker: + name: "Past PyTorch Docker" + strategy: + fail-fast: false + matrix: + version: ["1.10", "1.9", "1.8", "1.7", "1.6", "1.5", "1.4"] + runs-on: ubuntu-latest + steps: + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - + name: Check out code + uses: actions/checkout@v2 + - + name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + - + name: Build and push + uses: docker/build-push-action@v2 + with: + context: ./docker/transformers-past-gpu + build-args: | + REF=main + FRAMEWORK=pytorch + VERSION=${{ matrix.version }} + push: true + tags: huggingface/transformers-pytorch-past-${{ matrix.version }}-gpu + + past-tensorflow-docker: + name: "Past TensorFlow Docker" + strategy: + fail-fast: false + matrix: + version: ["2.8", "2.7", "2.6", "2.5"] + runs-on: ubuntu-latest + steps: + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - + name: Check out code + uses: actions/checkout@v2 + - + name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + - + name: Build and push + uses: docker/build-push-action@v2 + with: + context: ./docker/transformers-past-gpu + build-args: | + REF=main + FRAMEWORK=tensorflow + VERSION=${{ matrix.version }} + push: true + tags: huggingface/transformers-tensorflow-past-${{ matrix.version }}-gpu + + past-tensorflow-docker-2-4: + name: "Past TensorFlow Docker" + strategy: + fail-fast: false + matrix: + version: ["2.4"] + runs-on: ubuntu-latest + steps: + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - + name: Check out code + uses: actions/checkout@v2 + - + name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + - + name: Build and push + uses: docker/build-push-action@v2 + with: + context: ./docker/transformers-past-gpu + build-args: | + REF=main + BASE_DOCKER_IMAGE=nvidia/cuda:11.0.3-cudnn8-devel-ubuntu20.04 + FRAMEWORK=tensorflow + VERSION=${{ matrix.version }} + push: true + tags: huggingface/transformers-tensorflow-past-${{ matrix.version }}-gpu \ No newline at end of file diff --git a/.github/workflows/self-past-caller.yml b/.github/workflows/self-past-caller.yml new file mode 100644 index 0000000000..461a2cc207 --- /dev/null +++ b/.github/workflows/self-past-caller.yml @@ -0,0 +1,126 @@ +name: Self-hosted runner (past-ci-caller) + +on: + push: + branches: + - run-past-ci* + +jobs: + run_past_ci_pytorch_1-10: + name: PyTorch 1.10 + if: always() + uses: ./.github/workflows/self-past.yml + with: + framework: pytorch + version: "1.10" + secrets: inherit + + run_past_ci_pytorch_1-9: + name: PyTorch 1.9 + if: always() + needs: [run_past_ci_pytorch_1-10] + uses: ./.github/workflows/self-past.yml + with: + framework: pytorch + version: "1.9" + secrets: inherit + + run_past_ci_pytorch_1-8: + name: PyTorch 1.8 + if: always() + needs: [run_past_ci_pytorch_1-9] + uses: ./.github/workflows/self-past.yml + with: + framework: pytorch + version: "1.8" + secrets: inherit + + run_past_ci_pytorch_1-7: + name: PyTorch 1.7 + if: always() + needs: [run_past_ci_pytorch_1-8] + uses: ./.github/workflows/self-past.yml + with: + framework: pytorch + version: "1.7" + secrets: inherit + + run_past_ci_pytorch_1-6: + name: PyTorch 1.6 + if: always() + needs: [run_past_ci_pytorch_1-7] + uses: ./.github/workflows/self-past.yml + with: + framework: pytorch + version: "1.6" + secrets: inherit + + run_past_ci_pytorch_1-5: + name: PyTorch 1.5 + if: always() + needs: [run_past_ci_pytorch_1-6] + uses: ./.github/workflows/self-past.yml + with: + framework: pytorch + version: "1.5" + secrets: inherit + + run_past_ci_pytorch_1-4: + name: PyTorch 1.4 + if: always() + needs: [run_past_ci_pytorch_1-5] + uses: ./.github/workflows/self-past.yml + with: + framework: pytorch + version: "1.4" + secrets: inherit + + run_past_ci_tensorflow_2-8: + name: TensorFlow 2.8 + if: always() + needs: [run_past_ci_pytorch_1-4] + uses: ./.github/workflows/self-past.yml + with: + framework: tensorflow + version: "2.8" + secrets: inherit + + run_past_ci_tensorflow_2-7: + name: TensorFlow 2.7 + if: always() + needs: [run_past_ci_tensorflow_2-8] + uses: ./.github/workflows/self-past.yml + with: + framework: tensorflow + version: "2.7" + secrets: inherit + + run_past_ci_tensorflow_2-6: + name: TensorFlow 2.6 + if: always() + needs: [run_past_ci_tensorflow_2-7] + uses: ./.github/workflows/self-past.yml + with: + framework: tensorflow + version: "2.6" + secrets: inherit + + run_past_ci_tensorflow_2-5: + name: TensorFlow 2.5 + if: always() + needs: [run_past_ci_tensorflow_2-6] + uses: ./.github/workflows/self-past.yml + with: + framework: tensorflow + version: "2.5" + secrets: inherit + + run_past_ci_tensorflow_2-4: + name: TensorFlow 2.4 + if: always() + needs: [run_past_ci_tensorflow_2-5] + uses: ./.github/workflows/self-past.yml + with: + framework: tensorflow + version: "2.4" + secrets: inherit \ No newline at end of file diff --git a/.github/workflows/self-past.yml b/.github/workflows/self-past.yml new file mode 100644 index 0000000000..cfa27895e6 --- /dev/null +++ b/.github/workflows/self-past.yml @@ -0,0 +1,179 @@ +name: Self-hosted runner (past) + +# Note that each job's dependencies go into a corresponding docker file. +# +# For example for `run_all_tests_torch_cuda_extensions_gpu` the docker image is +# `huggingface/transformers-pytorch-deepspeed-latest-gpu`, which can be found at +# `docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile` + +on: + workflow_call: + inputs: + framework: + required: true + type: string + version: + required: true + type: string + +env: + HF_HOME: /mnt/cache + TRANSFORMERS_IS_CI: yes + OMP_NUM_THREADS: 8 + MKL_NUM_THREADS: 8 + RUN_SLOW: yes + SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} + TF_FORCE_GPU_ALLOW_GROWTH: true + RUN_PT_TF_CROSS_TESTS: 1 + +jobs: + setup: + name: Setup + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - name: Checkout transformers + uses: actions/checkout@v2 + with: + fetch-depth: 2 + + - name: Cleanup + run: | + rm -rf tests/__pycache__ + rm -rf tests/models/__pycache__ + rm -rf reports + + - id: set-matrix + name: Identify models to test + run: | + cd tests + echo "::set-output name=matrix::$(python3 -c 'import os; tests = os.getcwd(); model_tests = os.listdir(os.path.join(tests, "models")); d1 = sorted(list(filter(os.path.isdir, os.listdir(tests)))); d2 = sorted(list(filter(os.path.isdir, [f"models/{x}" for x in model_tests]))); d1.remove("models"); d = d2 + d1; print(d)')" + + run_tests_single_gpu: + name: Model tests + strategy: + fail-fast: false + matrix: + folders: ${{ fromJson(needs.setup.outputs.matrix) }} + machine_type: [single-gpu] + runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker-past-ci') }} + container: + image: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu + options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + needs: setup + steps: + - name: Update clone + working-directory: /transformers + run: git fetch && git checkout ${{ github.sha }} + + - name: Echo folder ${{ matrix.folders }} + shell: bash + # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to + # set the artifact folder names (because the character `/` is not allowed). + run: | + echo "${{ matrix.folders }}" + matrix_folders=${{ matrix.folders }} + matrix_folders=${matrix_folders/'models/'/'models_'} + echo "$matrix_folders" + echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV + + - name: NVIDIA-SMI + run: | + nvidia-smi + + - name: Environment + working-directory: /transformers + run: | + python3 utils/print_env.py + + - name: Run all tests on GPU + working-directory: /transformers + run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }} + + - name: Failure short reports + if: ${{ failure() }} + continue-on-error: true + run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt + + - name: Test suite reports artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} + + run_tests_multi_gpu: + name: Model tests + strategy: + fail-fast: false + matrix: + folders: ${{ fromJson(needs.setup.outputs.matrix) }} + machine_type: [multi-gpu] + runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker-past-ci') }} + container: + image: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu + options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + needs: setup + steps: + - name: Update clone + working-directory: /transformers + run: git fetch && git checkout ${{ github.sha }} + + - name: Echo folder ${{ matrix.folders }} + shell: bash + # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to + # set the artifact folder names (because the character `/` is not allowed). + run: | + echo "${{ matrix.folders }}" + matrix_folders=${{ matrix.folders }} + matrix_folders=${matrix_folders/'models/'/'models_'} + echo "$matrix_folders" + echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV + + - name: NVIDIA-SMI + run: | + nvidia-smi + + - name: Environment + working-directory: /transformers + run: | + python3 utils/print_env.py + + - name: Run all tests on GPU + working-directory: /transformers + run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} tests/${{ matrix.folders }} + + - name: Failure short reports + if: ${{ failure() }} + continue-on-error: true + run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt + + - name: Test suite reports artifacts + if: ${{ always() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }} + + send_results: + name: Send results to webhook + runs-on: ubuntu-latest + if: always() + needs: [setup, run_tests_single_gpu, run_tests_multi_gpu] + steps: + - uses: actions/checkout@v2 + - uses: actions/download-artifact@v2 + - name: Send message to Slack + env: + CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} + CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }} + CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} + CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} + CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_PAST_FUTURE }} + CI_EVENT: Past CI - ${{ inputs.framework }}-${{ inputs.version }} + # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change + # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`. + run: | + pip install slack_sdk + python utils/notification_service.py "${{ needs.setup.outputs.matrix }}" \ No newline at end of file diff --git a/docker/transformers-past-gpu/Dockerfile b/docker/transformers-past-gpu/Dockerfile new file mode 100644 index 0000000000..826a8f12c2 --- /dev/null +++ b/docker/transformers-past-gpu/Dockerfile @@ -0,0 +1,43 @@ +ARG BASE_DOCKER_IMAGE="nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04" +FROM $BASE_DOCKER_IMAGE +LABEL maintainer="Hugging Face" + +ARG DEBIAN_FRONTEND=noninteractive + +# Use login shell to read variables from `~/.profile` (to pass dynamic created variables between RUN commands) +SHELL ["sh", "-lc"] + +RUN apt update +RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg git-lfs +RUN git lfs install +RUN python3 -m pip install --no-cache-dir --upgrade pip + +ARG REF=main +RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF +RUN python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime] + +# When installing in editable mode, `transformers` is not recognized as a package. +# this line must be added in order for python to be aware of transformers. +RUN cd transformers && python3 setup.py develop + +ARG FRAMEWORK +ARG VERSION + +# Remove all frameworks +# (`accelerate` requires `torch`, and this causes import issues for TF-only testing) +RUN python3 -m pip uninstall -y torch torchvision torchaudio accelerate tensorflow jax flax + +# Get the libraries and their versions to install, and write installation command to `~/.profile`. +RUN python3 ./transformers/utils/past_ci_versions.py --framework $FRAMEWORK --version $VERSION + +# Install the target framework +RUN echo "INSTALL_CMD = $INSTALL_CMD" +RUN $INSTALL_CMD + +# Having installation problems for torch-scatter with torch <= 1.6. Disable so we have the same set of tests. +# (This part will be removed once the logic of using `past_ci_versions.py` is used in other Dockerfile files.) +# # Use installed torch version for `torch-scatter`. +# # (The env. variable $CUDA is defined in `past_ci_versions.py`) +# RUN [ "$FRAMEWORK" = "pytorch" ] && python3 -m pip install --no-cache-dir torch-scatter -f https://data.pyg.org/whl/torch-$(python3 -c "from torch import version; print(version.__version__.split('+')[0])")+$CUDA.html || echo "torch-scatter not to be installed" + +RUN python3 -m pip install -U "itsdangerous<2.1.0" diff --git a/utils/past_ci_versions.py b/utils/past_ci_versions.py new file mode 100644 index 0000000000..f64decf253 --- /dev/null +++ b/utils/past_ci_versions.py @@ -0,0 +1,130 @@ +import argparse +import os + + +past_versions_testing = { + "pytorch": { + "1.10": { + "torch": "1.10.2", + "torchvision": "0.11.3", + "torchaudio": "0.10.2", + "python": 3.9, + "cuda": "cu113", + "install": ( + "python3 -m pip install --no-cache-dir -U torch==1.10.2 torchvision==0.11.3 torchaudio==0.10.2" + " --extra-index-url https://download.pytorch.org/whl/cu113" + ), + }, + # torchaudio < 0.10 has no CUDA-enabled binary distributions + "1.9": { + "torch": "1.9.1", + "torchvision": "0.10.1", + "torchaudio": "0.9.1", + "python": 3.9, + "cuda": "cu111", + "install": ( + "python3 -m pip install --no-cache-dir -U torch==1.9.1 torchvision==0.10.1 torchaudio==0.9.1" + " --extra-index-url https://download.pytorch.org/whl/cu111" + ), + }, + "1.8": { + "torch": "1.8.1", + "torchvision": "0.9.1", + "torchaudio": "0.8.1", + "python": 3.9, + "cuda": "cu111", + "install": ( + "python3 -m pip install --no-cache-dir -U torch==1.8.1 torchvision==0.9.1 torchaudio==0.8.1" + " --extra-index-url https://download.pytorch.org/whl/cu111" + ), + }, + "1.7": { + "torch": "1.7.1", + "torchvision": "0.8.2", + "torchaudio": "0.7.2", + "python": 3.9, + "cuda": "cu110", + "install": ( + "python3 -m pip install --no-cache-dir -U torch==1.7.1 torchvision==0.8.2 torchaudio==0.7.2" + " --extra-index-url https://download.pytorch.org/whl/cu110" + ), + }, + "1.6": { + "torch": "1.6.0", + "torchvision": "0.7.0", + "torchaudio": "0.6.0", + "python": 3.8, + "cuda": "cu101", + "install": ( + "python3 -m pip install --no-cache-dir -U torch==1.6.0 torchvision==0.7.0 torchaudio==0.6.0" + " --extra-index-url https://download.pytorch.org/whl/cu101" + ), + }, + "1.5": { + "torch": "1.5.1", + "torchvision": "0.6.1", + "torchaudio": "0.5.1", + "python": 3.8, + "cuda": "cu101", + "install": ( + "python3 -m pip install --no-cache-dir -U torch==1.5.1 torchvision==0.6.1 torchaudio==0.5.1" + " --extra-index-url https://download.pytorch.org/whl/cu101" + ), + }, + "1.4": { + "torch": "1.4.0", + "torchvision": "0.5.0", + "torchaudio": "0.4.0", + "python": 3.8, + "cuda": "cu100", + "install": ( + "python3 -m pip install --no-cache-dir -U torch==1.4.0 torchvision==0.5.0 torchaudio==0.4.0" + " --extra-index-url https://download.pytorch.org/whl/cu100" + ), + }, + }, + "tensorflow": { + "2.8": { + "tensorflow": "2.8.2", + "install": "python3 -m pip install --no-cache-dir -U tensorflow==2.8.2", + }, + "2.7": { + "tensorflow": "2.7.3", + "install": "python3 -m pip install --no-cache-dir -U tensorflow==2.7.3", + }, + "2.6": { + "tensorflow": "2.6.5", + "install": "python3 -m pip install --no-cache-dir -U tensorflow==2.6.5", + }, + "2.5": { + "tensorflow": "2.5.3", + "install": "python3 -m pip install --no-cache-dir -U tensorflow==2.5.3", + }, + # need another `nvidia:cuda` docker image, otherwise GPU not working + "2.4": { + "tensorflow": "2.4.4", + "install": "python3 -m pip install --no-cache-dir -U tensorflow==2.4.4", + # This should be specified as a docker build argument. + # We keep the information here for reference only. + "base_docker": "nvidia/cuda:11.0.3-cudnn8-devel-ubuntu20.04", + }, + }, +} + + +if __name__ == "__main__": + parser = argparse.ArgumentParser("Choose the framework and version to install") + parser.add_argument("--framework", help="The framework to install. Should be `torch` or `tensorflow`", type=str) + parser.add_argument("--version", help="The version of the framework to install.", type=str) + args = parser.parse_args() + + info = past_versions_testing[args.framework][args.version] + + os.system(f'echo "export INSTALL_CMD=\'{info["install"]}\'" >> ~/.profile') + print(f'echo "export INSTALL_CMD=\'{info["install"]}\'" >> ~/.profile') + + cuda = "" + if args.framework == "pytorch": + cuda = info["cuda"] + os.system(f"echo \"export CUDA='{cuda}'\" >> ~/.profile") + print(f"echo \"export CUDA='{cuda}'\" >> ~/.profile")