From cebb07262ff71724aa2c49f54d38c32e933fe8c7 Mon Sep 17 00:00:00 2001 From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Date: Thu, 25 Apr 2024 10:23:40 +0200 Subject: [PATCH] Workflow / ENH: Add SSH into our runners workflow (#30425) * add SSH into our runners workflow * fix * fix * fix * use our previous approaches * forward contrib credits from discussions --------- Co-authored-by: Yih-Dar --- .github/workflows/ssh-runner.yml | 60 ++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 .github/workflows/ssh-runner.yml diff --git a/.github/workflows/ssh-runner.yml b/.github/workflows/ssh-runner.yml new file mode 100644 index 0000000000..b5a0d05af7 --- /dev/null +++ b/.github/workflows/ssh-runner.yml @@ -0,0 +1,60 @@ +name: SSH into our runners + +on: + workflow_dispatch: + inputs: + runner_type: + description: 'Type of runner to test (a10 or t4)' + required: true + docker_image: + description: 'Name of the Docker image' + required: true + +env: + IS_GITHUB_CI: "1" + HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} + HF_HOME: /mnt/cache + TRANSFORMERS_IS_CI: yes + OMP_NUM_THREADS: 8 + MKL_NUM_THREADS: 8 + RUN_SLOW: yes # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. # This token is created under the bot `hf-transformers-bot`. + SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} + TF_FORCE_GPU_ALLOW_GROWTH: true + RUN_PT_TF_CROSS_TESTS: 1 + +jobs: + ssh_runner: + name: SSH + runs-on: [single-gpu, nvidia-gpu, ${{ github.event.inputs.runner_type }}, ci] + container: + image: ${{ github.event.inputs.docker_image }} + options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + + steps: + - name: Update clone + working-directory: /transformers + run: | + git fetch && git checkout ${{ github.sha }} + + - name: Cleanup + working-directory: /transformers + run: | + rm -rf tests/__pycache__ + rm -rf tests/models/__pycache__ + rm -rf reports + + - name: Show installed libraries and their versions + working-directory: /transformers + run: pip freeze + + - name: NVIDIA-SMI + run: | + nvidia-smi + + - name: Tailscale # In order to be able to SSH when a test fails + uses: huggingface/tailscale-action@v1 + with: + authkey: ${{ secrets.TAILSCALE_SSH_AUTHKEY }} + slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }} + slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} + waitForSSH: true