From ff8b88a948fc2f6aba421ca64ad165291928dcee Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Tue, 26 Aug 2025 22:02:15 +0200 Subject: [PATCH] Fix nightly torch CI (#40469) Co-authored-by: ydshieh --- .github/workflows/self-nightly-caller.yml | 22 +++++++++++++++++++ docker/transformers-all-latest-gpu/Dockerfile | 7 ++++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/.github/workflows/self-nightly-caller.yml b/.github/workflows/self-nightly-caller.yml index 6192c8039f..48edaf8187 100644 --- a/.github/workflows/self-nightly-caller.yml +++ b/.github/workflows/self-nightly-caller.yml @@ -12,12 +12,34 @@ on: branches: - run_ci_with_nightly_torch* +# Used for `push` to easily modify the target workflow runs to compare against +env: + prev_workflow_run_id: "" + other_workflow_run_id: "" + + jobs: build_nightly_torch_ci_images: name: Build CI Docker Images with nightly torch uses: ./.github/workflows/build-nightly-ci-docker-images.yml secrets: inherit + setup: + name: Setup + runs-on: ubuntu-22.04 + steps: + - name: Setup + run: | + mkdir "setup_values" + echo "${{ inputs.prev_workflow_run_id || env.prev_workflow_run_id }}" > "setup_values/prev_workflow_run_id.txt" + echo "${{ inputs.other_workflow_run_id || env.other_workflow_run_id }}" > "setup_values/other_workflow_run_id.txt" + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: setup_values + path: setup_values + model-ci: name: Model CI needs: build_nightly_torch_ci_images diff --git a/docker/transformers-all-latest-gpu/Dockerfile b/docker/transformers-all-latest-gpu/Dockerfile index 1f1cabe356..66aef6578f 100644 --- a/docker/transformers-all-latest-gpu/Dockerfile +++ b/docker/transformers-all-latest-gpu/Dockerfile @@ -32,7 +32,10 @@ RUN python3 -m pip uninstall -y flax jax RUN python3 -m pip install --no-cache-dir -U timm -RUN python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git pytesseract +RUN [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir git+https://github.com/facebookresearch/detectron2.git || echo "Don't install detectron2 with nightly torch" + +RUN python3 -m pip install --no-cache-dir pytesseract + RUN python3 -m pip install -U "itsdangerous<2.1.0" RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/accelerate@main#egg=accelerate @@ -52,7 +55,7 @@ RUN python3 -m pip install --no-cache-dir bitsandbytes RUN python3 -m pip install --no-cache-dir quanto # After using A10 as CI runner, let's run FA2 tests -RUN python3 -m pip uninstall -y ninja && python3 -m pip install --no-cache-dir ninja && python3 -m pip install flash-attn --no-cache-dir --no-build-isolation +RUN [ "$PYTORCH" != "pre" ] && python3 -m pip uninstall -y ninja && python3 -m pip install --no-cache-dir ninja && python3 -m pip install flash-attn --no-cache-dir --no-build-isolation || echo "Don't install FA2 with nightly torch" # TODO (ydshieh): check this again # `quanto` will install `ninja` which leads to many `CUDA error: an illegal memory access ...` in some model tests