(Re-)Enable Nightly + Past CI (#22393)
* Enable Nightly + Past CI * put schedule --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
59
.github/workflows/build-docker-images.yml
vendored
59
.github/workflows/build-docker-images.yml
vendored
@@ -3,7 +3,7 @@ name: Build docker images (scheduled)
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- docker-image*
|
||||
- build_ci_docker_image*
|
||||
repository_dispatch:
|
||||
workflow_call:
|
||||
inputs:
|
||||
@@ -67,35 +67,6 @@ jobs:
|
||||
push: true
|
||||
tags: huggingface/transformers-all-latest-gpu-push-ci
|
||||
|
||||
latest-with-torch-nightly-docker:
|
||||
name: "Nightly PyTorch + Stable TensorFlow"
|
||||
# Push CI doesn't need this image
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v3
|
||||
with:
|
||||
context: ./docker/transformers-all-latest-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
PYTORCH=pre
|
||||
push: true
|
||||
tags: huggingface/transformers-all-latest-torch-nightly-gpu
|
||||
|
||||
latest-torch-deepspeed-docker:
|
||||
name: "Latest PyTorch + DeepSpeed"
|
||||
runs-on: ubuntu-latest
|
||||
@@ -153,34 +124,6 @@ jobs:
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-deepspeed-latest-gpu-push-ci
|
||||
|
||||
nightly-torch-deepspeed-docker:
|
||||
name: "Nightly PyTorch + DeepSpeed"
|
||||
# Push CI doesn't need this image
|
||||
if: inputs.image_postfix != '-push-ci'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v3
|
||||
with:
|
||||
context: ./docker/transformers-pytorch-deepspeed-nightly-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-deepspeed-nightly-gpu
|
||||
|
||||
doc-builder:
|
||||
name: "Doc builder"
|
||||
# Push CI doesn't need this image
|
||||
|
||||
75
.github/workflows/build-nightly-ci-docker-images.yml
vendored
Normal file
75
.github/workflows/build-nightly-ci-docker-images.yml
vendored
Normal file
@@ -0,0 +1,75 @@
|
||||
name: Build docker images (Nightly CI)
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
push:
|
||||
branches:
|
||||
- build_nightly_ci_docker_image*
|
||||
|
||||
concurrency:
|
||||
group: docker-images-builds
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
latest-with-torch-nightly-docker:
|
||||
name: "Nightly PyTorch + Stable TensorFlow"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Cleanup disk
|
||||
run: |
|
||||
sudo ls -l /usr/local/lib/
|
||||
sudo ls -l /usr/share/
|
||||
sudo du -sh /usr/local/lib/
|
||||
sudo du -sh /usr/share/
|
||||
sudo rm -rf /usr/local/lib/android
|
||||
sudo rm -rf /usr/share/dotnet
|
||||
sudo du -sh /usr/local/lib/
|
||||
sudo du -sh /usr/share/
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v3
|
||||
with:
|
||||
context: ./docker/transformers-all-latest-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
PYTORCH=pre
|
||||
push: true
|
||||
tags: huggingface/transformers-all-latest-torch-nightly-gpu
|
||||
|
||||
nightly-torch-deepspeed-docker:
|
||||
name: "Nightly PyTorch + DeepSpeed"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v3
|
||||
with:
|
||||
context: ./docker/transformers-pytorch-deepspeed-nightly-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
push: true
|
||||
tags: huggingface/transformers-pytorch-deepspeed-nightly-gpu
|
||||
@@ -3,7 +3,7 @@ name: Build docker images (Past CI)
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- past-ci-docker-image*
|
||||
- build_past_ci_docker_image*
|
||||
|
||||
concurrency:
|
||||
group: docker-images-builds
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
version: ["1.11", "1.10", "1.9", "1.8", "1.7", "1.6", "1.5", "1.4"]
|
||||
version: ["1.13", "1.12", "1.11", "1.10", "1.9"]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
-
|
||||
@@ -24,6 +24,17 @@ jobs:
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
-
|
||||
id: get-base-image
|
||||
name: Get Base Image
|
||||
env:
|
||||
framework_version: ${{ matrix.version }}
|
||||
run: |
|
||||
echo "base_image=$(python3 -c 'import os; from utils.past_ci_versions import past_versions_testing; base_image = past_versions_testing["pytorch"][os.environ["framework_version"]]["base_image"]; print(base_image)')" >> $GITHUB_OUTPUT
|
||||
-
|
||||
name: Print Base Image
|
||||
run: |
|
||||
echo ${{ steps.get-base-image.outputs.base_image }}
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v2
|
||||
@@ -37,6 +48,7 @@ jobs:
|
||||
context: ./docker/transformers-past-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
BASE_DOCKER_IMAGE=${{ steps.get-base-image.outputs.base_image }}
|
||||
FRAMEWORK=pytorch
|
||||
VERSION=${{ matrix.version }}
|
||||
push: true
|
||||
@@ -47,7 +59,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
version: ["2.8", "2.7", "2.6", "2.5"]
|
||||
version: ["2.11", "2.10", "2.9", "2.8", "2.7", "2.6", "2.5"]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
-
|
||||
@@ -56,6 +68,17 @@ jobs:
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
-
|
||||
id: get-base-image
|
||||
name: Get Base Image
|
||||
env:
|
||||
framework_version: ${{ matrix.version }}
|
||||
run: |
|
||||
echo "base_image=$(python3 -c 'import os; from utils.past_ci_versions import past_versions_testing; base_image = past_versions_testing["tensorflow"][os.environ["framework_version"]]["base_image"]; print(base_image)')" >> $GITHUB_OUTPUT
|
||||
-
|
||||
name: Print Base Image
|
||||
run: |
|
||||
echo ${{ steps.get-base-image.outputs.base_image }}
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v2
|
||||
@@ -69,40 +92,8 @@ jobs:
|
||||
context: ./docker/transformers-past-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
BASE_DOCKER_IMAGE=${{ steps.get-base-image.outputs.base_image }}
|
||||
FRAMEWORK=tensorflow
|
||||
VERSION=${{ matrix.version }}
|
||||
push: true
|
||||
tags: huggingface/transformers-tensorflow-past-${{ matrix.version }}-gpu
|
||||
|
||||
past-tensorflow-docker-2-4:
|
||||
name: "Past TensorFlow Docker"
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
version: ["2.4"]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
-
|
||||
name: Check out code
|
||||
uses: actions/checkout@v3
|
||||
-
|
||||
name: Login to DockerHub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_PASSWORD }}
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v3
|
||||
with:
|
||||
context: ./docker/transformers-past-gpu
|
||||
build-args: |
|
||||
REF=main
|
||||
BASE_DOCKER_IMAGE=nvidia/cuda:11.0.3-cudnn8-devel-ubuntu20.04
|
||||
FRAMEWORK=tensorflow
|
||||
VERSION=${{ matrix.version }}
|
||||
push: true
|
||||
tags: huggingface/transformers-tensorflow-past-${{ matrix.version }}-gpu
|
||||
143
.github/workflows/self-nightly-past-ci-caller.yml
vendored
Normal file
143
.github/workflows/self-nightly-past-ci-caller.yml
vendored
Normal file
@@ -0,0 +1,143 @@
|
||||
name: Self-hosted runner (nightly-past-ci-caller)
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# 2 am on each Sunday and Thursday
|
||||
- cron: "0 2 * * 0,4"
|
||||
push:
|
||||
branches:
|
||||
- run_nightly_ci*
|
||||
- run_past_ci*
|
||||
|
||||
jobs:
|
||||
build_nightly_ci_images:
|
||||
name: Build Nightly CI Docker Images
|
||||
if: (github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_nightly_ci'))
|
||||
uses: ./.github/workflows/build-nightly-ci-docker-images.yml
|
||||
secrets: inherit
|
||||
|
||||
run_nightly_ci:
|
||||
name: Nightly CI
|
||||
needs: [build_nightly_ci_images]
|
||||
uses: ./.github/workflows/self-nightly-scheduled.yml
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_pytorch_1-13:
|
||||
name: PyTorch 1.13
|
||||
if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
|
||||
needs: [run_nightly_ci]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: pytorch
|
||||
version: "1.13"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_pytorch_1-12:
|
||||
name: PyTorch 1.12
|
||||
if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
|
||||
needs: [run_past_ci_pytorch_1-13]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: pytorch
|
||||
version: "1.12"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_pytorch_1-11:
|
||||
name: PyTorch 1.11
|
||||
if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
|
||||
needs: [run_past_ci_pytorch_1-12]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: pytorch
|
||||
version: "1.11"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_pytorch_1-10:
|
||||
name: PyTorch 1.10
|
||||
if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
|
||||
needs: [run_past_ci_pytorch_1-11]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: pytorch
|
||||
version: "1.10"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_pytorch_1-9:
|
||||
name: PyTorch 1.9
|
||||
if: (cancelled() != true) && ((github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci')))
|
||||
needs: [run_past_ci_pytorch_1-10]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: pytorch
|
||||
version: "1.9"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_tensorflow_2-11:
|
||||
name: TensorFlow 2.11
|
||||
if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
|
||||
needs: [run_past_ci_pytorch_1-9]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: tensorflow
|
||||
version: "2.11"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_tensorflow_2-10:
|
||||
name: TensorFlow 2.10
|
||||
if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
|
||||
needs: [run_past_ci_tensorflow_2-11]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: tensorflow
|
||||
version: "2.10"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_tensorflow_2-9:
|
||||
name: TensorFlow 2.9
|
||||
if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
|
||||
needs: [run_past_ci_tensorflow_2-10]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: tensorflow
|
||||
version: "2.9"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_tensorflow_2-8:
|
||||
name: TensorFlow 2.8
|
||||
if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
|
||||
needs: [run_past_ci_tensorflow_2-9]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: tensorflow
|
||||
version: "2.8"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_tensorflow_2-7:
|
||||
name: TensorFlow 2.7
|
||||
if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
|
||||
needs: [run_past_ci_tensorflow_2-8]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: tensorflow
|
||||
version: "2.7"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_tensorflow_2-6:
|
||||
name: TensorFlow 2.6
|
||||
if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
|
||||
needs: [run_past_ci_tensorflow_2-7]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: tensorflow
|
||||
version: "2.6"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_tensorflow_2-5:
|
||||
name: TensorFlow 2.5
|
||||
if: (cancelled() != true) && ((github.event_name == 'push') && startsWith(github.ref_name, 'run_past_ci'))
|
||||
needs: [run_past_ci_tensorflow_2-6]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: tensorflow
|
||||
version: "2.5"
|
||||
secrets: inherit
|
||||
34
.github/workflows/self-nightly-scheduled.yml
vendored
34
.github/workflows/self-nightly-scheduled.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Self-hosted runner (nightly)
|
||||
name: Self-hosted runner (nightly-ci)
|
||||
|
||||
# Note that each job's dependencies go into a corresponding docker file.
|
||||
#
|
||||
@@ -8,9 +8,7 @@ name: Self-hosted runner (nightly)
|
||||
|
||||
on:
|
||||
repository_dispatch:
|
||||
# Disable temporarily until the test suite can be run under 12 hours.
|
||||
# schedule:
|
||||
# - cron: "0 16 * * *"
|
||||
workflow_call:
|
||||
|
||||
env:
|
||||
HF_HOME: /mnt/cache
|
||||
@@ -33,7 +31,7 @@ jobs:
|
||||
fetch-depth: 2
|
||||
|
||||
- name: Check Runner Status
|
||||
run: python utils/check_self_hosted_runner.py --target_runners single-gpu-scheduled-ci-runner-docker,multi-gpu-scheduled-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
run: python utils/check_self_hosted_runner.py --target_runners single-gpu-past-ci-runner-docker,multi-gpu-past-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
|
||||
check_runners:
|
||||
name: Check Runners
|
||||
@@ -41,7 +39,7 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
machine_type: [single-gpu, multi-gpu]
|
||||
runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker') }}
|
||||
runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker-past-ci') }}
|
||||
container:
|
||||
image: huggingface/transformers-all-latest-torch-nightly-gpu
|
||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
@@ -56,7 +54,7 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
machine_type: [single-gpu, multi-gpu]
|
||||
runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker') }}
|
||||
runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker-past-ci') }}
|
||||
container:
|
||||
image: huggingface/transformers-all-latest-torch-nightly-gpu
|
||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
@@ -96,7 +94,7 @@ jobs:
|
||||
matrix:
|
||||
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
|
||||
machine_type: [single-gpu]
|
||||
runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker') }}
|
||||
runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker-past-ci') }}
|
||||
container:
|
||||
image: huggingface/transformers-all-latest-torch-nightly-gpu
|
||||
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
@@ -143,7 +141,7 @@ jobs:
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
||||
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
|
||||
|
||||
run_tests_multi_gpu:
|
||||
@@ -153,7 +151,7 @@ jobs:
|
||||
matrix:
|
||||
folders: ${{ fromJson(needs.setup.outputs.matrix) }}
|
||||
machine_type: [multi-gpu]
|
||||
runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker') }}
|
||||
runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker-past-ci') }}
|
||||
container:
|
||||
image: huggingface/transformers-all-latest-torch-nightly-gpu
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
@@ -200,7 +198,7 @@ jobs:
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
||||
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
|
||||
|
||||
run_all_tests_torch_cuda_extensions_gpu:
|
||||
@@ -209,7 +207,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [single-gpu, multi-gpu]
|
||||
runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker') }}
|
||||
runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker-past-ci') }}
|
||||
needs: setup
|
||||
container:
|
||||
image: huggingface/transformers-pytorch-deepspeed-nightly-gpu
|
||||
@@ -258,7 +256,7 @@ jobs:
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports
|
||||
name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_nightly
|
||||
path: /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
|
||||
|
||||
send_results:
|
||||
@@ -292,7 +290,7 @@ jobs:
|
||||
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
|
||||
CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_PAST_FUTURE }}
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
CI_EVENT: nightly-build
|
||||
CI_EVENT: Nightly CI
|
||||
RUNNER_STATUS: ${{ needs.check_runner_status.result }}
|
||||
RUNNER_ENV_STATUS: ${{ needs.check_runners.result }}
|
||||
SETUP_STATUS: ${{ needs.setup.result }}
|
||||
@@ -302,3 +300,11 @@ jobs:
|
||||
pip install slack_sdk
|
||||
pip show slack_sdk
|
||||
python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
|
||||
|
||||
|
||||
# delete-artifact
|
||||
- uses: geekyeggo/delete-artifact@v2
|
||||
with:
|
||||
name: |
|
||||
single-*
|
||||
multi-*
|
||||
136
.github/workflows/self-past-caller.yml
vendored
136
.github/workflows/self-past-caller.yml
vendored
@@ -1,136 +0,0 @@
|
||||
name: Self-hosted runner (past-ci-caller)
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- run-past-ci*
|
||||
|
||||
jobs:
|
||||
run_past_ci_pytorch_1-11:
|
||||
name: PyTorch 1.11
|
||||
if: always()
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: pytorch
|
||||
version: "1.11"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_pytorch_1-10:
|
||||
name: PyTorch 1.10
|
||||
if: always()
|
||||
needs: [run_past_ci_pytorch_1-11]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: pytorch
|
||||
version: "1.10"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_pytorch_1-9:
|
||||
name: PyTorch 1.9
|
||||
if: always()
|
||||
needs: [run_past_ci_pytorch_1-10]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: pytorch
|
||||
version: "1.9"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_pytorch_1-8:
|
||||
name: PyTorch 1.8
|
||||
if: always()
|
||||
needs: [run_past_ci_pytorch_1-9]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: pytorch
|
||||
version: "1.8"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_pytorch_1-7:
|
||||
name: PyTorch 1.7
|
||||
if: always()
|
||||
needs: [run_past_ci_pytorch_1-8]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: pytorch
|
||||
version: "1.7"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_pytorch_1-6:
|
||||
name: PyTorch 1.6
|
||||
if: always()
|
||||
needs: [run_past_ci_pytorch_1-7]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: pytorch
|
||||
version: "1.6"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_pytorch_1-5:
|
||||
name: PyTorch 1.5
|
||||
if: always()
|
||||
needs: [run_past_ci_pytorch_1-6]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: pytorch
|
||||
version: "1.5"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_pytorch_1-4:
|
||||
name: PyTorch 1.4
|
||||
if: always()
|
||||
needs: [run_past_ci_pytorch_1-5]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: pytorch
|
||||
version: "1.4"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_tensorflow_2-8:
|
||||
name: TensorFlow 2.8
|
||||
if: always()
|
||||
needs: [run_past_ci_pytorch_1-4]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: tensorflow
|
||||
version: "2.8"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_tensorflow_2-7:
|
||||
name: TensorFlow 2.7
|
||||
if: always()
|
||||
needs: [run_past_ci_tensorflow_2-8]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: tensorflow
|
||||
version: "2.7"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_tensorflow_2-6:
|
||||
name: TensorFlow 2.6
|
||||
if: always()
|
||||
needs: [run_past_ci_tensorflow_2-7]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: tensorflow
|
||||
version: "2.6"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_tensorflow_2-5:
|
||||
name: TensorFlow 2.5
|
||||
if: always()
|
||||
needs: [run_past_ci_tensorflow_2-6]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: tensorflow
|
||||
version: "2.5"
|
||||
secrets: inherit
|
||||
|
||||
run_past_ci_tensorflow_2-4:
|
||||
name: TensorFlow 2.4
|
||||
if: always()
|
||||
needs: [run_past_ci_tensorflow_2-5]
|
||||
uses: ./.github/workflows/self-past.yml
|
||||
with:
|
||||
framework: tensorflow
|
||||
version: "2.4"
|
||||
secrets: inherit
|
||||
83
.github/workflows/self-past.yml
vendored
83
.github/workflows/self-past.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Self-hosted runner (past)
|
||||
name: Self-hosted runner (past-ci)
|
||||
|
||||
# Note that each job's dependencies go into a corresponding docker file.
|
||||
#
|
||||
@@ -157,7 +157,7 @@ jobs:
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
||||
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
|
||||
|
||||
run_tests_multi_gpu:
|
||||
@@ -223,14 +223,80 @@ jobs:
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports
|
||||
name: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}
|
||||
|
||||
run_all_tests_torch_cuda_extensions_gpu:
|
||||
name: Torch CUDA extension tests
|
||||
if: inputs.framework == 'pytorch'
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
machine_type: [single-gpu, multi-gpu]
|
||||
runs-on: ${{ format('{0}-{1}', matrix.machine_type, 'docker-past-ci') }}
|
||||
needs: setup
|
||||
container:
|
||||
image: huggingface/transformers-${{ inputs.framework }}-past-${{ inputs.version }}-gpu
|
||||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||
steps:
|
||||
- name: Update clone
|
||||
working-directory: /transformers
|
||||
run: git fetch && git checkout ${{ github.sha }}
|
||||
|
||||
- name: Remove cached torch extensions
|
||||
run: rm -rf /github/home/.cache/torch_extensions/
|
||||
|
||||
# To avoid unknown test failures
|
||||
- name: Pre build DeepSpeed *again*
|
||||
working-directory: /
|
||||
run: |
|
||||
python3 -m pip uninstall -y deepspeed
|
||||
rm -rf DeepSpeed
|
||||
git clone https://github.com/microsoft/DeepSpeed && cd DeepSpeed && rm -rf build
|
||||
DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_AIO=1 DS_BUILD_UTILS=1 python3 -m pip install . --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check
|
||||
|
||||
- name: NVIDIA-SMI
|
||||
run: |
|
||||
nvidia-smi
|
||||
|
||||
- name: Environment
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 utils/print_env.py
|
||||
|
||||
- name: Show installed libraries and their versions
|
||||
working-directory: /transformers
|
||||
run: pip freeze
|
||||
|
||||
- name: Run all tests on GPU
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu tests/deepspeed tests/extended
|
||||
|
||||
- name: Failure short reports
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt
|
||||
|
||||
- name: Test suite reports artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}
|
||||
path: /transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu
|
||||
|
||||
send_results:
|
||||
name: Send results to webhook
|
||||
runs-on: ubuntu-latest
|
||||
if: always()
|
||||
needs: [check_runner_status, check_runners, setup, run_tests_single_gpu, run_tests_multi_gpu]
|
||||
needs: [
|
||||
check_runner_status,
|
||||
check_runners,
|
||||
setup,
|
||||
run_tests_single_gpu,
|
||||
run_tests_multi_gpu,
|
||||
run_all_tests_torch_cuda_extensions_gpu
|
||||
]
|
||||
steps:
|
||||
- name: Preliminary job status
|
||||
shell: bash
|
||||
@@ -272,4 +338,11 @@ jobs:
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: test_failure_tables_${{ inputs.framework }}-${{ inputs.version }}
|
||||
path: test_failure_tables
|
||||
path: test_failure_tables
|
||||
|
||||
# delete-artifact
|
||||
- uses: geekyeggo/delete-artifact@v2
|
||||
with:
|
||||
name: |
|
||||
single-*
|
||||
multi-*
|
||||
3
.github/workflows/self-scheduled.yml
vendored
3
.github/workflows/self-scheduled.yml
vendored
@@ -10,6 +10,9 @@ on:
|
||||
repository_dispatch:
|
||||
schedule:
|
||||
- cron: "0 2 * * *"
|
||||
push:
|
||||
branches:
|
||||
- run_scheduled_ci*
|
||||
|
||||
env:
|
||||
HF_HOME: /mnt/cache
|
||||
|
||||
Reference in New Issue
Block a user