Update notification service (#17921)
Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
13
.github/workflows/self-past.yml
vendored
13
.github/workflows/self-past.yml
vendored
@@ -164,6 +164,11 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
- uses: actions/download-artifact@v2
|
- uses: actions/download-artifact@v2
|
||||||
|
|
||||||
|
# Create a directory to store test failure tables in the next step
|
||||||
|
- name: Create directory
|
||||||
|
run: mkdir test_failure_tables
|
||||||
|
|
||||||
- name: Send message to Slack
|
- name: Send message to Slack
|
||||||
env:
|
env:
|
||||||
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
|
||||||
@@ -177,3 +182,11 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
pip install slack_sdk
|
pip install slack_sdk
|
||||||
python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
|
python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
|
||||||
|
|
||||||
|
# Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
|
||||||
|
- name: Failure table artifacts
|
||||||
|
if: ${{ always() }}
|
||||||
|
uses: actions/upload-artifact@v2
|
||||||
|
with:
|
||||||
|
name: test_failure_tables_${{ inputs.framework }}-${{ inputs.version }}
|
||||||
|
path: test_failure_tables
|
||||||
@@ -233,15 +233,11 @@ class Message:
|
|||||||
individual_reports.append(key)
|
individual_reports.append(key)
|
||||||
|
|
||||||
header = "Single | Multi | Category\n"
|
header = "Single | Multi | Category\n"
|
||||||
category_failures_report = header + "\n".join(sorted(individual_reports))
|
category_failures_report = prepare_reports(
|
||||||
|
title="The following modeling categories had failures", header=header, reports=individual_reports
|
||||||
|
)
|
||||||
|
|
||||||
return {
|
return {"type": "section", "text": {"type": "mrkdwn", "text": category_failures_report}}
|
||||||
"type": "section",
|
|
||||||
"text": {
|
|
||||||
"type": "mrkdwn",
|
|
||||||
"text": f"The following modeling categories had failures:\n\n```\n{category_failures_report}\n```",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def model_failures(self) -> Dict:
|
def model_failures(self) -> Dict:
|
||||||
@@ -302,21 +298,44 @@ class Message:
|
|||||||
|
|
||||||
model_header = "Single PT | Multi PT | Single TF | Multi TF | Other | Category\n"
|
model_header = "Single PT | Multi PT | Single TF | Multi TF | Other | Category\n"
|
||||||
sorted_model_reports = sorted(model_reports, key=lambda s: s.split("] ")[-1])
|
sorted_model_reports = sorted(model_reports, key=lambda s: s.split("] ")[-1])
|
||||||
model_failures_report = model_header + "\n".join(sorted_model_reports)
|
model_failures_report = prepare_reports(
|
||||||
|
title="These following model modules had failures", header=model_header, reports=sorted_model_reports
|
||||||
|
)
|
||||||
|
|
||||||
module_header = "Single | Multi | Category\n"
|
module_header = "Single | Multi | Category\n"
|
||||||
sorted_module_reports = sorted(other_module_reports, key=lambda s: s.split("] ")[-1])
|
sorted_module_reports = sorted(other_module_reports, key=lambda s: s.split("] ")[-1])
|
||||||
module_failures_report = module_header + "\n".join(sorted_module_reports)
|
module_failures_report = prepare_reports(
|
||||||
|
title="The following non-model modules had failures", header=module_header, reports=sorted_module_reports
|
||||||
|
)
|
||||||
|
|
||||||
report = ""
|
model_failure_sections = [
|
||||||
|
{"type": "section", "text": {"type": "mrkdwn", "text": model_failures_report}},
|
||||||
|
{"type": "section", "text": {"type": "mrkdwn", "text": module_failures_report}},
|
||||||
|
]
|
||||||
|
|
||||||
if len(model_reports):
|
# Save complete tables (for past CI) - to be uploaded as artifacts
|
||||||
report += f"These following model modules had failures:\n```\n{model_failures_report}\n```\n\n"
|
if ci_event.startswith("Past CI"):
|
||||||
|
model_failures_report = prepare_reports(
|
||||||
|
title="These following model modules had failures",
|
||||||
|
header=model_header,
|
||||||
|
reports=sorted_model_reports,
|
||||||
|
to_truncate=False,
|
||||||
|
)
|
||||||
|
file_path = os.path.join(os.getcwd(), "test_failure_tables/model_failures_report.txt")
|
||||||
|
with open(file_path, "w", encoding="UTF-8") as fp:
|
||||||
|
fp.write(model_failures_report)
|
||||||
|
|
||||||
if len(other_module_reports):
|
module_failures_report = prepare_reports(
|
||||||
report += f"The following non-model modules had failures:\n```\n{module_failures_report}\n```\n\n"
|
title="The following non-model modules had failures",
|
||||||
|
header=module_header,
|
||||||
|
reports=sorted_module_reports,
|
||||||
|
to_truncate=False,
|
||||||
|
)
|
||||||
|
file_path = os.path.join(os.getcwd(), "test_failure_tables/module_failures_report.txt")
|
||||||
|
with open(file_path, "w", encoding="UTF-8") as fp:
|
||||||
|
fp.write(module_failures_report)
|
||||||
|
|
||||||
return {"type": "section", "text": {"type": "mrkdwn", "text": report}}
|
return model_failure_sections
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def additional_failures(self) -> Dict:
|
def additional_failures(self) -> Dict:
|
||||||
@@ -337,15 +356,11 @@ class Message:
|
|||||||
individual_reports.append(report)
|
individual_reports.append(report)
|
||||||
|
|
||||||
header = "Single | Multi | Category\n"
|
header = "Single | Multi | Category\n"
|
||||||
failures_report = header + "\n".join(sorted(individual_reports))
|
failures_report = prepare_reports(
|
||||||
|
title="The following non-modeling tests had failures", header=header, reports=individual_reports
|
||||||
|
)
|
||||||
|
|
||||||
return {
|
return {"type": "section", "text": {"type": "mrkdwn", "text": failures_report}}
|
||||||
"type": "section",
|
|
||||||
"text": {
|
|
||||||
"type": "mrkdwn",
|
|
||||||
"text": f"The following non-modeling tests had failures:\n```\n{failures_report}\n```",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def payload(self) -> str:
|
def payload(self) -> str:
|
||||||
@@ -358,7 +373,10 @@ class Message:
|
|||||||
blocks.append(self.failures)
|
blocks.append(self.failures)
|
||||||
|
|
||||||
if self.n_model_failures > 0:
|
if self.n_model_failures > 0:
|
||||||
blocks.extend([self.category_failures, self.model_failures])
|
blocks.append(self.category_failures)
|
||||||
|
for block in self.model_failures:
|
||||||
|
if block["text"]["text"]:
|
||||||
|
blocks.append(block)
|
||||||
|
|
||||||
if self.n_additional_failures > 0:
|
if self.n_additional_failures > 0:
|
||||||
blocks.append(self.additional_failures)
|
blocks.append(self.additional_failures)
|
||||||
@@ -582,6 +600,29 @@ def retrieve_available_artifacts():
|
|||||||
return _available_artifacts
|
return _available_artifacts
|
||||||
|
|
||||||
|
|
||||||
|
def prepare_reports(title, header, reports, to_truncate=True):
|
||||||
|
report = ""
|
||||||
|
|
||||||
|
MAX_ERROR_TEXT = 3000 - len("[Truncated]")
|
||||||
|
if not to_truncate:
|
||||||
|
MAX_ERROR_TEXT = float("inf")
|
||||||
|
|
||||||
|
if len(reports) > 0:
|
||||||
|
# `text` must be less than 3001 characters in Slack SDK
|
||||||
|
# keep some room for adding "[Truncated]" when necessary
|
||||||
|
|
||||||
|
for idx in range(len(reports)):
|
||||||
|
_report = header + "\n".join(reports[: idx + 1])
|
||||||
|
new_report = f"{title}:\n```\n{_report}\n```\n"
|
||||||
|
if len(new_report) > MAX_ERROR_TEXT:
|
||||||
|
# `report` here has length <= 3000
|
||||||
|
report = report + "[Truncated]"
|
||||||
|
break
|
||||||
|
report = new_report
|
||||||
|
|
||||||
|
return report
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
org = "huggingface"
|
org = "huggingface"
|
||||||
@@ -686,16 +727,24 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
unclassified_model_failures = []
|
unclassified_model_failures = []
|
||||||
|
|
||||||
|
# This prefix is used to get job links below. For past CI, we use `workflow_call`, which changes the job names from
|
||||||
|
# `Model tests (...)` to `PyTorch 1.5 / Model tests (...)` for example.
|
||||||
|
job_name_prefix = ""
|
||||||
|
if ci_event.startswith("Past CI - "):
|
||||||
|
framework, version = ci_event.replace("Past CI - ", "").split("-")
|
||||||
|
framework = "PyTorch" if framework == "pytorch" else "TensorFlow"
|
||||||
|
job_name_prefix = f"{framework} {version}"
|
||||||
|
|
||||||
for model in model_results.keys():
|
for model in model_results.keys():
|
||||||
for artifact_path in available_artifacts[f"run_all_tests_gpu_{model}_test_reports"].paths:
|
for artifact_path in available_artifacts[f"run_all_tests_gpu_{model}_test_reports"].paths:
|
||||||
artifact = retrieve_artifact(artifact_path["name"], artifact_path["gpu"])
|
artifact = retrieve_artifact(artifact_path["name"], artifact_path["gpu"])
|
||||||
if "stats" in artifact:
|
if "stats" in artifact:
|
||||||
# Link to the GitHub Action job
|
# Link to the GitHub Action job
|
||||||
model_results[model]["job_link"][artifact_path["gpu"]] = github_actions_job_links.get(
|
# The job names use `matrix.folder` which contain things like `models/bert` instead of `models_bert`
|
||||||
# The job names use `matrix.folder` which contain things like `models/bert` instead of `models_bert`
|
job_name = f"Model tests ({model.replace('models_', 'models/')}, {artifact_path['gpu']}-gpu)"
|
||||||
f"Model tests ({model.replace('models_', 'models/')}, {artifact_path['gpu']}-gpu)"
|
if job_name_prefix:
|
||||||
)
|
job_name = f"{job_name_prefix} / {job_name}"
|
||||||
|
model_results[model]["job_link"][artifact_path["gpu"]] = github_actions_job_links.get(job_name)
|
||||||
failed, success, time_spent = handle_test_results(artifact["stats"])
|
failed, success, time_spent = handle_test_results(artifact["stats"])
|
||||||
model_results[model]["success"] += success
|
model_results[model]["success"] += success
|
||||||
model_results[model]["time_spent"] += time_spent[1:-1] + ", "
|
model_results[model]["time_spent"] += time_spent[1:-1] + ", "
|
||||||
@@ -809,7 +858,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
message = Message(title, ci_title, model_results, additional_results)
|
message = Message(title, ci_title, model_results, additional_results)
|
||||||
|
|
||||||
# send report only if there is any failure
|
# send report only if there is any failure (for push CI)
|
||||||
if message.n_failures:
|
if message.n_failures or ci_event != "push":
|
||||||
message.post()
|
message.post()
|
||||||
message.post_reply()
|
message.post_reply()
|
||||||
|
|||||||
Reference in New Issue
Block a user