Add checks for some workflow jobs (#18583)
Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
34
.github/workflows/self-push.yml
vendored
34
.github/workflows/self-push.yml
vendored
@@ -111,9 +111,24 @@ jobs:
|
|||||||
echo "::set-output name=matrix::$keys"
|
echo "::set-output name=matrix::$keys"
|
||||||
echo "::set-output name=test_map::$test_map"
|
echo "::set-output name=test_map::$test_map"
|
||||||
|
|
||||||
|
run_check_runners:
|
||||||
|
name: Check Runners
|
||||||
|
needs: setup
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
machine_type: [single-gpu, multi-gpu]
|
||||||
|
runs-on: [self-hosted, docker-gpu, '${{ matrix.machine_type }}']
|
||||||
|
container:
|
||||||
|
image: huggingface/transformers-all-latest-gpu
|
||||||
|
options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
|
||||||
|
steps:
|
||||||
|
- name: NVIDIA-SMI
|
||||||
|
run: |
|
||||||
|
nvidia-smi
|
||||||
|
|
||||||
run_tests_single_gpu:
|
run_tests_single_gpu:
|
||||||
name: Model tests
|
name: Model tests
|
||||||
needs: setup
|
needs: [setup, run_check_runners]
|
||||||
# `dummy` means there is no test to run
|
# `dummy` means there is no test to run
|
||||||
if: contains(fromJson(needs.setup.outputs.matrix), 'dummy') != true
|
if: contains(fromJson(needs.setup.outputs.matrix), 'dummy') != true
|
||||||
strategy:
|
strategy:
|
||||||
@@ -198,7 +213,7 @@ jobs:
|
|||||||
|
|
||||||
run_tests_multi_gpu:
|
run_tests_multi_gpu:
|
||||||
name: Model tests
|
name: Model tests
|
||||||
needs: setup
|
needs: [setup, run_check_runners]
|
||||||
# `dummy` means there is no test to run
|
# `dummy` means there is no test to run
|
||||||
if: contains(fromJson(needs.setup.outputs.matrix), 'dummy') != true
|
if: contains(fromJson(needs.setup.outputs.matrix), 'dummy') != true
|
||||||
strategy:
|
strategy:
|
||||||
@@ -285,7 +300,7 @@ jobs:
|
|||||||
|
|
||||||
run_tests_torch_cuda_extensions_single_gpu:
|
run_tests_torch_cuda_extensions_single_gpu:
|
||||||
name: Torch CUDA extension tests
|
name: Torch CUDA extension tests
|
||||||
needs: setup
|
needs: [setup, run_check_runners]
|
||||||
if: contains(fromJson(needs.setup.outputs.matrix), 'deepspeed') || contains(fromJson(needs.setup.outputs.matrix), 'extended')
|
if: contains(fromJson(needs.setup.outputs.matrix), 'deepspeed') || contains(fromJson(needs.setup.outputs.matrix), 'extended')
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
@@ -364,7 +379,7 @@ jobs:
|
|||||||
|
|
||||||
run_tests_torch_cuda_extensions_multi_gpu:
|
run_tests_torch_cuda_extensions_multi_gpu:
|
||||||
name: Torch CUDA extension tests
|
name: Torch CUDA extension tests
|
||||||
needs: setup
|
needs: [setup, run_check_runners]
|
||||||
if: contains(fromJson(needs.setup.outputs.matrix), 'deepspeed') || contains(fromJson(needs.setup.outputs.matrix), 'extended')
|
if: contains(fromJson(needs.setup.outputs.matrix), 'deepspeed') || contains(fromJson(needs.setup.outputs.matrix), 'extended')
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
@@ -447,12 +462,20 @@ jobs:
|
|||||||
if: always()
|
if: always()
|
||||||
needs: [
|
needs: [
|
||||||
setup,
|
setup,
|
||||||
|
run_check_runners,
|
||||||
run_tests_single_gpu,
|
run_tests_single_gpu,
|
||||||
run_tests_multi_gpu,
|
run_tests_multi_gpu,
|
||||||
run_tests_torch_cuda_extensions_single_gpu,
|
run_tests_torch_cuda_extensions_single_gpu,
|
||||||
run_tests_torch_cuda_extensions_multi_gpu
|
run_tests_torch_cuda_extensions_multi_gpu
|
||||||
]
|
]
|
||||||
steps:
|
steps:
|
||||||
|
- name: Preliminary job status
|
||||||
|
shell: bash
|
||||||
|
# For the meaning of these environment variables, see the job `Setup`
|
||||||
|
run: |
|
||||||
|
echo "Setup status: ${{ needs.setup.result }}"
|
||||||
|
echo "Runner status: ${{ needs.run_check_runners.result }}"
|
||||||
|
|
||||||
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
|
# Necessary to get the correct branch name and commit SHA for `workflow_run` event
|
||||||
# We also take into account the `push` event (we might want to test some changes in a branch)
|
# We also take into account the `push` event (we might want to test some changes in a branch)
|
||||||
- name: Prepare custom environment variables
|
- name: Prepare custom environment variables
|
||||||
@@ -498,6 +521,9 @@ jobs:
|
|||||||
CI_TITLE_PUSH: ${{ github.event.head_commit.message }}
|
CI_TITLE_PUSH: ${{ github.event.head_commit.message }}
|
||||||
CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }}
|
CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }}
|
||||||
CI_SHA: ${{ env.CI_SHA }}
|
CI_SHA: ${{ env.CI_SHA }}
|
||||||
|
SETUP_STATUS: ${{ needs.setup.result }}
|
||||||
|
RUNNER_STATUS: ${{ needs.run_check_runners.result }}
|
||||||
|
|
||||||
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
|
# We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
|
||||||
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
|
# `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
@@ -387,28 +387,52 @@ class Message:
|
|||||||
return json.dumps(blocks)
|
return json.dumps(blocks)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def error_out():
|
def error_out(title, ci_title="", setup_failed=False, runner_failed=False):
|
||||||
payload = [
|
|
||||||
{
|
blocks = []
|
||||||
"type": "section",
|
title_block = {"type": "header", "text": {"type": "plain_text", "text": title}}
|
||||||
"text": {
|
blocks.append(title_block)
|
||||||
"type": "plain_text",
|
|
||||||
"text": "There was an issue running the tests.",
|
if ci_title:
|
||||||
},
|
ci_title_block = {"type": "section", "text": {"type": "mrkdwn", "text": ci_title}}
|
||||||
"accessory": {
|
blocks.append(ci_title_block)
|
||||||
"type": "button",
|
|
||||||
"text": {"type": "plain_text", "text": "Check Action results", "emoji": True},
|
if setup_failed:
|
||||||
"url": f"https://github.com/huggingface/transformers/actions/runs/{os.environ['GITHUB_RUN_ID']}",
|
text = "💔 Setup job failed. Tests are not run. 😭"
|
||||||
},
|
elif runner_failed:
|
||||||
}
|
text = "💔 CI runners have problems! Tests are not run. 😭"
|
||||||
]
|
else:
|
||||||
|
text = "💔 There was an issue running the tests. 😭"
|
||||||
|
|
||||||
|
error_block_1 = {
|
||||||
|
"type": "header",
|
||||||
|
"text": {
|
||||||
|
"type": "plain_text",
|
||||||
|
"text": text,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
error_block_2 = {
|
||||||
|
"type": "section",
|
||||||
|
"text": {
|
||||||
|
"type": "plain_text",
|
||||||
|
"text": "🙏 Let's fix it ASAP! 🙏",
|
||||||
|
},
|
||||||
|
"accessory": {
|
||||||
|
"type": "button",
|
||||||
|
"text": {"type": "plain_text", "text": "Check Action results", "emoji": True},
|
||||||
|
"url": f"https://github.com/huggingface/transformers/actions/runs/{os.environ['GITHUB_RUN_ID']}",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
blocks.extend([error_block_1, error_block_2])
|
||||||
|
|
||||||
|
payload = json.dumps(blocks)
|
||||||
|
|
||||||
print("Sending the following payload")
|
print("Sending the following payload")
|
||||||
print(json.dumps({"blocks": json.loads(payload)}))
|
print(json.dumps({"blocks": blocks}))
|
||||||
|
|
||||||
client.chat_postMessage(
|
client.chat_postMessage(
|
||||||
channel=os.environ["CI_SLACK_REPORT_CHANNEL_ID"],
|
channel=os.environ["CI_SLACK_REPORT_CHANNEL_ID"],
|
||||||
text="There was an issue running the tests.",
|
text=text,
|
||||||
blocks=payload,
|
blocks=payload,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -630,6 +654,11 @@ def prepare_reports(title, header, reports, to_truncate=True):
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
setup_status = os.environ.get("SETUP_STATUS")
|
||||||
|
runner_status = os.environ.get("RUNNER_STATUS")
|
||||||
|
setup_failed = True if setup_status is not None and setup_status != "success" else False
|
||||||
|
runner_failed = True if runner_status is not None and runner_status != "success" else False
|
||||||
|
|
||||||
org = "huggingface"
|
org = "huggingface"
|
||||||
repo = "transformers"
|
repo = "transformers"
|
||||||
repository_full_name = f"{org}/{repo}"
|
repository_full_name = f"{org}/{repo}"
|
||||||
@@ -689,6 +718,10 @@ if __name__ == "__main__":
|
|||||||
else:
|
else:
|
||||||
ci_title = ""
|
ci_title = ""
|
||||||
|
|
||||||
|
if setup_failed or runner_failed:
|
||||||
|
Message.error_out(title, ci_title, setup_failed, runner_failed)
|
||||||
|
exit(0)
|
||||||
|
|
||||||
arguments = sys.argv[1:][0]
|
arguments = sys.argv[1:][0]
|
||||||
try:
|
try:
|
||||||
models = ast.literal_eval(arguments)
|
models = ast.literal_eval(arguments)
|
||||||
|
|||||||
Reference in New Issue
Block a user