From 12ce2941c7b67c0dedac0f0468b3ed854fa940ab Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Thu, 20 Oct 2022 20:36:13 +0200 Subject: [PATCH] Fix docker image build (#19759) * Use 2 jobs for the docker image build (latest torch + DS) * fix * Add comment Co-authored-by: ydshieh --- .github/workflows/build-docker-images.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml index 64c8b5bd48..04e5d5a34c 100644 --- a/.github/workflows/build-docker-images.yml +++ b/.github/workflows/build-docker-images.yml @@ -111,6 +111,27 @@ jobs: REF=main push: true tags: huggingface/transformers-pytorch-deepspeed-latest-gpu${{ inputs.image_postfix }} + + # Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`) + latest-torch-deepspeed-docker-for-push-ci-daily-build: + name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)" + # Can't run in parallel, otherwise get an error: + # `Error response from daemon: Get "https://registry-1.docker.io/v2/": received unexpected HTTP status: 503 Service Unavailable` + needs: latest-torch-deepspeed-docker + runs-on: ubuntu-latest + steps: + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - + name: Check out code + uses: actions/checkout@v2 + - + name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} # Push CI images still need to be re-built daily - name: Build and push (for Push CI) in a daily basis