new failure CI reports for all jobs (#38298)
* new failures * report_repo_id * report_repo_id * report_repo_id * More fixes * More fixes * More fixes * ruff --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -9,6 +9,18 @@ on:
|
||||
start_sha:
|
||||
required: true
|
||||
type: string
|
||||
job:
|
||||
required: true
|
||||
type: string
|
||||
slack_report_channel:
|
||||
required: true
|
||||
type: string
|
||||
ci_event:
|
||||
required: true
|
||||
type: string
|
||||
report_repo_id:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
|
||||
env:
|
||||
@@ -26,7 +38,7 @@ env:
|
||||
|
||||
|
||||
jobs:
|
||||
run_models_gpu:
|
||||
check_new_failures:
|
||||
name: " "
|
||||
runs-on:
|
||||
group: aws-g4dn-4xlarge-cache
|
||||
@@ -36,17 +48,17 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: ci_results_run_models_gpu
|
||||
path: /transformers/ci_results_run_models_gpu
|
||||
name: ci_results_${{ inputs.job }}
|
||||
path: /transformers/ci_results_${{ inputs.job }}
|
||||
|
||||
- name: Check file
|
||||
working-directory: /transformers
|
||||
run: |
|
||||
if [ -f ci_results_run_models_gpu/new_model_failures.json ]; then
|
||||
echo "`ci_results_run_models_gpu/new_model_failures.json` exists, continue ..."
|
||||
if [ -f ci_results_${{ inputs.job }}/new_failures.json ]; then
|
||||
echo "`ci_results_${{ inputs.job }}/new_failures.json` exists, continue ..."
|
||||
echo "process=true" >> $GITHUB_ENV
|
||||
else
|
||||
echo "`ci_results_run_models_gpu/new_model_failures.json` doesn't exist, abort."
|
||||
echo "`ci_results_${{ inputs.job }}/new_failures.json` doesn't exist, abort."
|
||||
echo "process=false" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
@@ -112,14 +124,14 @@ jobs:
|
||||
- name: Check failed tests
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_run_models_gpu/new_model_failures.json --output_file new_model_failures_with_bad_commit.json
|
||||
run: python3 utils/check_bad_commit.py --start_commit ${{ inputs.start_sha }} --end_commit ${{ env.END_SHA }} --file ci_results_${{ inputs.job }}/new_failures.json --output_file new_failures_with_bad_commit.json
|
||||
|
||||
- name: Show results
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: |
|
||||
ls -l new_model_failures_with_bad_commit.json
|
||||
cat new_model_failures_with_bad_commit.json
|
||||
ls -l new_failures_with_bad_commit.json
|
||||
cat new_failures_with_bad_commit.json
|
||||
|
||||
- name: Checkout back
|
||||
working-directory: /transformers
|
||||
@@ -134,6 +146,8 @@ jobs:
|
||||
env:
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
|
||||
JOB_NAME: ${{ inputs.job }}
|
||||
REPORT_REPO_ID: ${{ inputs.report_repo_id }}
|
||||
run: |
|
||||
python3 utils/process_bad_commit_report.py
|
||||
|
||||
@@ -144,6 +158,8 @@ jobs:
|
||||
env:
|
||||
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
|
||||
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
|
||||
JOB_NAME: ${{ inputs.job }}
|
||||
REPORT_REPO_ID: ${{ inputs.report_repo_id }}
|
||||
run: |
|
||||
{
|
||||
echo 'REPORT_TEXT<<EOF'
|
||||
@@ -151,17 +167,31 @@ jobs:
|
||||
echo EOF
|
||||
} >> "$GITHUB_ENV"
|
||||
|
||||
- name: Prepare Slack report title
|
||||
working-directory: /transformers
|
||||
if: ${{ env.process == 'true' }}
|
||||
run: |
|
||||
pip install slack_sdk
|
||||
echo "title=$(python3 -c 'import sys; sys.path.append("utils"); from utils.notification_service import job_to_test_map; ci_event = "${{ inputs.ci_event }}"; job = "${{ inputs.job }}"; test_name = job_to_test_map[job]; title = f"New failed tests of {ci_event}" + ":" + f" {test_name}"; print(title)')" >> $GITHUB_ENV
|
||||
|
||||
- name: Send processed report
|
||||
if: ${{ env.process == 'true' && !endsWith(env.REPORT_TEXT, '{}') }}
|
||||
uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
|
||||
with:
|
||||
# Slack channel id, channel name, or user id to post message.
|
||||
# See also: https://api.slack.com/methods/chat.postMessage#channels
|
||||
channel-id: '#transformers-ci-feedback-tests'
|
||||
channel-id: '#${{ inputs.slack_report_channel }}'
|
||||
# For posting a rich message using Block Kit
|
||||
payload: |
|
||||
{
|
||||
"blocks": [
|
||||
{
|
||||
"type": "header",
|
||||
"text": {
|
||||
"type": "plain_text",
|
||||
"text": "${{ env.title }}"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "section",
|
||||
"text": {
|
||||
@@ -19,6 +19,7 @@ jobs:
|
||||
runner: mi210
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi210
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
torch-pipeline:
|
||||
@@ -30,6 +31,7 @@ jobs:
|
||||
runner: mi210
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi210
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
example-ci:
|
||||
@@ -41,6 +43,7 @@ jobs:
|
||||
runner: mi210
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi210
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
deepspeed-ci:
|
||||
@@ -52,4 +55,5 @@ jobs:
|
||||
runner: mi210
|
||||
docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi210
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
@@ -19,6 +19,7 @@ jobs:
|
||||
runner: mi250
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi250
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
torch-pipeline:
|
||||
@@ -30,6 +31,7 @@ jobs:
|
||||
runner: mi250
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi250
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
example-ci:
|
||||
@@ -41,6 +43,7 @@ jobs:
|
||||
runner: mi250
|
||||
docker: huggingface/transformers-pytorch-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi250
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
deepspeed-ci:
|
||||
@@ -52,4 +55,5 @@ jobs:
|
||||
runner: mi250
|
||||
docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
|
||||
ci_event: Scheduled CI (AMD) - mi250
|
||||
report_repo_id: optimum-amd/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
7
.github/workflows/self-scheduled-caller.yml
vendored
7
.github/workflows/self-scheduled-caller.yml
vendored
@@ -54,6 +54,7 @@ jobs:
|
||||
runner: daily-ci
|
||||
docker: huggingface/transformers-all-latest-gpu
|
||||
ci_event: Daily CI
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
torch-pipeline:
|
||||
@@ -65,6 +66,7 @@ jobs:
|
||||
runner: daily-ci
|
||||
docker: huggingface/transformers-pytorch-gpu
|
||||
ci_event: Daily CI
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
tf-pipeline:
|
||||
@@ -76,6 +78,7 @@ jobs:
|
||||
runner: daily-ci
|
||||
docker: huggingface/transformers-tensorflow-gpu
|
||||
ci_event: Daily CI
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
example-ci:
|
||||
@@ -87,6 +90,7 @@ jobs:
|
||||
runner: daily-ci
|
||||
docker: huggingface/transformers-all-latest-gpu
|
||||
ci_event: Daily CI
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
trainer-fsdp-ci:
|
||||
@@ -98,6 +102,7 @@ jobs:
|
||||
runner: daily-ci
|
||||
docker: huggingface/transformers-all-latest-gpu
|
||||
ci_event: Daily CI
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
deepspeed-ci:
|
||||
@@ -110,6 +115,7 @@ jobs:
|
||||
docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
|
||||
ci_event: Daily CI
|
||||
working-directory-prefix: /workspace
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
quantization-ci:
|
||||
@@ -121,4 +127,5 @@ jobs:
|
||||
runner: daily-ci
|
||||
docker: huggingface/transformers-quantization-latest-gpu
|
||||
ci_event: Daily CI
|
||||
report_repo_id: hf-internal-testing/transformers_daily_ci
|
||||
secrets: inherit
|
||||
|
||||
19
.github/workflows/self-scheduled.yml
vendored
19
.github/workflows/self-scheduled.yml
vendored
@@ -28,6 +28,10 @@ on:
|
||||
default: ''
|
||||
required: false
|
||||
type: string
|
||||
report_repo_id:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
|
||||
env:
|
||||
HF_HOME: /mnt/cache
|
||||
@@ -584,15 +588,22 @@ jobs:
|
||||
folder_slices: ${{ needs.setup.outputs.folder_slices }}
|
||||
quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
|
||||
ci_event: ${{ inputs.ci_event }}
|
||||
report_repo_id: ${{ inputs.report_repo_id }}
|
||||
|
||||
secrets: inherit
|
||||
|
||||
check_new_model_failures:
|
||||
if: ${{ always() && inputs.ci_event == 'Daily CI' && inputs.job == 'run_models_gpu' && needs.send_results.result == 'success' }}
|
||||
name: Check new model failures
|
||||
check_new_failures:
|
||||
# TODO: work on `run_quantization_torch_gpu`
|
||||
if: ${{ always() && inputs.ci_event == 'Daily CI' && inputs.job != 'run_quantization_torch_gpu' && needs.send_results.result == 'success' }}
|
||||
name: Check new failures
|
||||
needs: send_results
|
||||
uses: ./.github/workflows/check_failed_model_tests.yml
|
||||
uses: ./.github/workflows/check_failed_tests.yml
|
||||
with:
|
||||
docker: ${{ inputs.docker }}
|
||||
start_sha: ${{ github.sha }}
|
||||
job: ${{ inputs.job }}
|
||||
slack_report_channel: ${{ inputs.slack_report_channel }}
|
||||
ci_event: ${{ inputs.ci_event }}
|
||||
report_repo_id: ${{ inputs.report_repo_id }}
|
||||
|
||||
secrets: inherit
|
||||
|
||||
5
.github/workflows/slack-report.yml
vendored
5
.github/workflows/slack-report.yml
vendored
@@ -21,6 +21,9 @@ on:
|
||||
ci_event:
|
||||
required: true
|
||||
type: string
|
||||
report_repo_id:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
env:
|
||||
TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
|
||||
@@ -67,6 +70,7 @@ jobs:
|
||||
CI_SHA: ${{ github.sha }}
|
||||
CI_TEST_JOB: ${{ inputs.job }}
|
||||
SETUP_STATUS: ${{ inputs.setup_status }}
|
||||
REPORT_REPO_ID: ${{ inputs.report_repo_id }}
|
||||
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
|
||||
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
|
||||
# For a job that doesn't depend on (i.e. `needs`) `setup`, the value for `inputs.folder_slices` would be an
|
||||
@@ -96,6 +100,7 @@ jobs:
|
||||
CI_SHA: ${{ github.sha }}
|
||||
CI_TEST_JOB: ${{ inputs.job }}
|
||||
SETUP_STATUS: ${{ inputs.setup_status }}
|
||||
REPORT_REPO_ID: ${{ inputs.report_repo_id }}
|
||||
# We pass `needs.setup.outputs.quantization_matrix` as the argument. A processing in `notification_service_quantization.py` to change
|
||||
# `quantization/bnb` to `quantization_bnb` is required, as the artifact names use `_` instead of `/`.
|
||||
run: |
|
||||
|
||||
Reference in New Issue
Block a user