Use one utils/notification_service.py (#38379)

* step 1

* step 2

* step 3

* step 4

* step 5

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar
2025-05-26 16:15:29 +02:00
committed by GitHub
parent 98328fd9a1
commit eb74cf977b
4 changed files with 147 additions and 407 deletions

View File

@@ -31,8 +31,7 @@ from slack_sdk import WebClient
# A map associating the job names (specified by `inputs.job` in a workflow file) with the keys of
# `additional_files`. This is used to remove some entries in `additional_files` that are not concerned by a
# specific job. See below.
# `additional_files`.
job_to_test_map = {
"run_models_gpu": "Models",
"run_trainer_and_fsdp_gpu": "Trainer & FSDP",
@@ -40,6 +39,18 @@ job_to_test_map = {
"run_pipelines_tf_gpu": "TensorFlow pipelines",
"run_examples_gpu": "Examples directory",
"run_torch_cuda_extensions_gpu": "DeepSpeed",
"run_quantization_torch_gpu": "Quantization",
}
# The values are used as the file names where to save the corresponding CI job results.
test_to_result_name = {
"Models": "model",
"Trainer & FSDP": "trainer_and_fsdp",
"PyTorch pipelines": "torch_pipeline",
"TensorFlow pipelines": "tf_pipeline",
"Examples directory": "example",
"DeepSpeed": "deepspeed",
"Quantization": "quantization",
}
NON_MODEL_TEST_MODULES = [
@@ -53,6 +64,8 @@ NON_MODEL_TEST_MODULES = [
"sagemaker",
"trainer",
"utils",
"fsdp",
"quantization",
]
@@ -221,7 +234,6 @@ class Message:
"type": "plain_text",
"text": (
f"There were {self.n_failures} failures, out of {self.n_tests} tests.\n"
f"Number of model failures: {self.n_model_failures}.\n"
f"The suite ran in {self.time}."
),
"emoji": True,
@@ -276,6 +288,10 @@ class Message:
@property
def category_failures(self) -> Dict:
if job_name != "run_models_gpu":
category_failures_report = ""
return {"type": "section", "text": {"type": "mrkdwn", "text": category_failures_report}}
model_failures = [v["failed"] for v in self.model_results.values()]
category_failures = {}
@@ -301,7 +317,7 @@ class Message:
header = "Single | Multi | Category\n"
category_failures_report = prepare_reports(
title="The following modeling categories had failures", header=header, reports=individual_reports
title="The following categories had failures", header=header, reports=individual_reports
)
return {"type": "section", "text": {"type": "mrkdwn", "text": category_failures_report}}
@@ -355,25 +371,40 @@ class Message:
}
for k, v in self.model_results.items():
# The keys in `model_results` may contain things like `models_vit` or `quantization_autoawq`
# Remove the prefix to make the report cleaner.
k = k.replace("models_", "").replace("quantization_", "")
if k in NON_MODEL_TEST_MODULES:
pass
continue
if sum(per_model_sum(v).values()):
dict_failed = dict(v["failed"])
pytorch_specific_failures = dict_failed.pop("PyTorch")
tensorflow_specific_failures = dict_failed.pop("TensorFlow")
other_failures = dicts_to_sum(dict_failed.values())
failures[k] = {
"PyTorch": pytorch_specific_failures,
"TensorFlow": tensorflow_specific_failures,
"other": other_failures,
}
# Model job has a special form for reporting
if job_name == "run_models_gpu":
pytorch_specific_failures = dict_failed.pop("PyTorch")
tensorflow_specific_failures = dict_failed.pop("TensorFlow")
other_failures = dicts_to_sum(dict_failed.values())
failures[k] = {
"PyTorch": pytorch_specific_failures,
"TensorFlow": tensorflow_specific_failures,
"other": other_failures,
}
else:
test_name = job_to_test_map[job_name]
specific_failures = dict_failed.pop(test_name)
failures[k] = {
test_name: specific_failures,
}
model_reports = []
other_module_reports = []
for key, value in non_model_failures.items():
key = key.replace("models_", "").replace("quantization_", "")
if key in NON_MODEL_TEST_MODULES:
device_report = self.get_device_report(value)
@@ -386,44 +417,60 @@ class Message:
other_module_reports.append(report)
for key, value in failures.items():
device_report_values = [
value["PyTorch"]["single"],
value["PyTorch"]["multi"],
value["TensorFlow"]["single"],
value["TensorFlow"]["multi"],
sum(value["other"].values()),
]
# Model job has a special form for reporting
if job_name == "run_models_gpu":
device_report_values = [
value["PyTorch"]["single"],
value["PyTorch"]["multi"],
value["TensorFlow"]["single"],
value["TensorFlow"]["multi"],
sum(value["other"].values()),
]
else:
test_name = job_to_test_map[job_name]
device_report_values = [
value[test_name]["single"],
value[test_name]["multi"],
]
if sum(device_report_values):
device_report = " | ".join([str(x).rjust(9) for x in device_report_values]) + " | "
# This is related to `model_header` below
rjust_width = 9 if job_name == "run_models_gpu" else 6
device_report = " | ".join([str(x).rjust(rjust_width) for x in device_report_values]) + " | "
report = f"{device_report}{key}"
model_reports.append(report)
# (Possibly truncated) reports for the current workflow run - to be sent to Slack channels
model_header = "Single PT | Multi PT | Single TF | Multi TF | Other | Category\n"
if job_name == "run_models_gpu":
model_header = "Single PT | Multi PT | Single TF | Multi TF | Other | Category\n"
else:
model_header = "Single | Multi | Category\n"
# Used when calling `prepare_reports` below to prepare the `title` argument
label = test_to_result_name[job_to_test_map[job_name]]
sorted_model_reports = sorted(model_reports, key=lambda s: s.split("| ")[-1])
model_failures_report = prepare_reports(
title="These following model modules had failures", header=model_header, reports=sorted_model_reports
title=f"These following {label} modules had failures", header=model_header, reports=sorted_model_reports
)
module_header = "Single | Multi | Category\n"
sorted_module_reports = sorted(other_module_reports, key=lambda s: s.split("| ")[-1])
module_failures_report = prepare_reports(
title="The following non-model modules had failures", header=module_header, reports=sorted_module_reports
title=f"The following {label} modules had failures", header=module_header, reports=sorted_module_reports
)
# To be sent to Slack channels
model_failure_sections = [
{"type": "section", "text": {"type": "mrkdwn", "text": model_failures_report}},
{"type": "section", "text": {"type": "mrkdwn", "text": module_failures_report}},
]
model_failure_sections = [{"type": "section", "text": {"type": "mrkdwn", "text": model_failures_report}}]
model_failure_sections.append({"type": "section", "text": {"type": "mrkdwn", "text": module_failures_report}})
# Save the complete (i.e. no truncation) failure tables (of the current workflow run)
# (to be uploaded as artifacts)
model_failures_report = prepare_reports(
title="These following model modules had failures",
title=f"These following {label} modules had failures",
header=model_header,
reports=sorted_model_reports,
to_truncate=False,
@@ -433,7 +480,7 @@ class Message:
fp.write(model_failures_report)
module_failures_report = prepare_reports(
title="The following non-model modules had failures",
title=f"The following {label} modules had failures",
header=module_header,
reports=sorted_module_reports,
to_truncate=False,
@@ -511,7 +558,10 @@ class Message:
blocks.append(self.failures)
if self.n_model_failures > 0:
blocks.append(self.category_failures)
block = self.category_failures
if block["text"]["text"]:
blocks.append(block)
for block in self.model_failures:
if block["text"]["text"]:
blocks.append(block)
@@ -565,7 +615,7 @@ class Message:
pattern = r"<(https://github.com/huggingface/transformers/actions/runs/.+?/job/.+?)\|(.+?)>"
items = re.findall(pattern, line)
elif "tests/" in line:
if "tests/models/" in line:
if "tests/models/" in line or "tests/quantization/" in line:
model = line.split("/")[2]
else:
model = line.split("/")[1]
@@ -609,7 +659,7 @@ class Message:
"text": {
"type": "mrkdwn",
# TODO: We should NOT assume it's always Nvidia CI, but it's the case at this moment.
"text": f"*There are {nb_new_failed_tests} failed tests unique to this run*\n\n(compared to Nvidia CI: <https://github.com/huggingface/transformers/actions/runs/{prev_workflow_run_id}|{prev_workflow_run_id}>)",
"text": f"*There are {nb_new_failed_tests} failed tests unique to {'this run' if not is_amd_daily_ci_workflow else 'AMD'}*\n\n(compared to Nvidia CI: <https://github.com/huggingface/transformers/actions/runs/{prev_workflow_run_id}|{prev_workflow_run_id}>)",
},
"accessory": {
"type": "button",
@@ -1058,13 +1108,24 @@ if __name__ == "__main__":
# In our usage in `.github/workflows/slack-report.yml`, we always pass an argument when calling this script.
# The argument could be an empty string `""` if a job doesn't depend on the job `setup`.
if arguments[0] == "":
models = []
job_matrix = []
else:
model_list_as_str = arguments[0]
job_matrix_as_str = arguments[0]
try:
folder_slices = ast.literal_eval(model_list_as_str)
# Need to change from elements like `models/bert` to `models_bert` (the ones used as artifact names).
models = [x.replace("models/", "models_") for folders in folder_slices for x in folders]
folder_slices = ast.literal_eval(job_matrix_as_str)
if len(folder_slices) > 0:
if isinstance(folder_slices[0], list):
# Need to change from elements like `models/bert` to `models_bert` (the ones used as artifact names).
job_matrix = [
x.replace("models/", "models_").replace("quantization/", "quantization_")
for folders in folder_slices
for x in folders
]
elif isinstance(folder_slices[0], str):
job_matrix = [
x.replace("models/", "models_").replace("quantization/", "quantization_")
for x in folder_slices
]
except Exception:
Message.error_out(title, ci_title)
raise ValueError("Errored out.")
@@ -1084,7 +1145,7 @@ if __name__ == "__main__":
available_artifacts = retrieve_available_artifacts()
modeling_categories = [
test_categories = [
"PyTorch",
"TensorFlow",
"Flax",
@@ -1093,35 +1154,34 @@ if __name__ == "__main__":
"Trainer",
"ONNX",
"Auto",
"Quantization",
"Unclassified",
]
job_name = os.getenv("CI_TEST_JOB")
report_name_prefix = "run_models_gpu"
if job_name == "run_trainer_and_fsdp_gpu":
report_name_prefix = job_name
report_name_prefix = job_name
# This dict will contain all the information relative to each model:
# - Failures: the total, as well as the number of failures per-category defined above
# - Success: total
# - Time spent: as a comma-separated list of elapsed time
# - Failures: as a line-break separated list of errors
model_results = {
model: {
"failed": {m: {"unclassified": 0, "single": 0, "multi": 0} for m in modeling_categories},
matrix_job_results = {
matrix_name: {
"failed": {m: {"unclassified": 0, "single": 0, "multi": 0} for m in test_categories},
"success": 0,
"time_spent": "",
"failures": {},
"job_link": {},
}
for model in models
if f"{report_name_prefix}_{model}_test_reports" in available_artifacts
for matrix_name in job_matrix
if f"{report_name_prefix}_{matrix_name}_test_reports" in available_artifacts
}
unclassified_model_failures = []
for model in model_results.keys():
for artifact_path_dict in available_artifacts[f"{report_name_prefix}_{model}_test_reports"].paths:
for matrix_name in matrix_job_results.keys():
for artifact_path_dict in available_artifacts[f"{report_name_prefix}_{matrix_name}_test_reports"].paths:
path = artifact_path_dict["path"]
artifact_gpu = artifact_path_dict["gpu"]
@@ -1133,13 +1193,14 @@ if __name__ == "__main__":
if "stats" in artifact:
# Link to the GitHub Action job
job = artifact_name_to_job_map[path]
model_results[model]["job_link"][artifact_gpu] = job["html_url"]
matrix_job_results[matrix_name]["job_link"][artifact_gpu] = job["html_url"]
failed, success, time_spent = handle_test_results(artifact["stats"])
model_results[model]["success"] += success
model_results[model]["time_spent"] += time_spent[1:-1] + ", "
matrix_job_results[matrix_name]["success"] += success
matrix_job_results[matrix_name]["time_spent"] += time_spent[1:-1] + ", "
stacktraces = handle_stacktraces(artifact["failures_line"])
# TODO: ???
for line in artifact["summary_short"].split("\n"):
if line.startswith("FAILED "):
# Avoid the extra `FAILED` entry given by `run_test_using_subprocess` causing issue when calling
@@ -1150,38 +1211,45 @@ if __name__ == "__main__":
line = line[len("FAILED ") :]
line = line.split()[0].replace("\n", "")
if artifact_gpu not in model_results[model]["failures"]:
model_results[model]["failures"][artifact_gpu] = []
if artifact_gpu not in matrix_job_results[matrix_name]["failures"]:
matrix_job_results[matrix_name]["failures"][artifact_gpu] = []
trace = pop_default(stacktraces, 0, "Cannot retrieve error message.")
model_results[model]["failures"][artifact_gpu].append({"line": line, "trace": trace})
matrix_job_results[matrix_name]["failures"][artifact_gpu].append(
{"line": line, "trace": trace}
)
if re.search("test_modeling_tf_", line):
model_results[model]["failed"]["TensorFlow"][artifact_gpu] += 1
# TODO: How to deal wit this
if re.search("tests/quantization", line):
matrix_job_results[matrix_name]["failed"]["Quantization"][artifact_gpu] += 1
elif re.search("test_modeling_tf_", line):
matrix_job_results[matrix_name]["failed"]["TensorFlow"][artifact_gpu] += 1
elif re.search("test_modeling_flax_", line):
model_results[model]["failed"]["Flax"][artifact_gpu] += 1
matrix_job_results[matrix_name]["failed"]["Flax"][artifact_gpu] += 1
elif re.search("test_modeling", line):
model_results[model]["failed"]["PyTorch"][artifact_gpu] += 1
matrix_job_results[matrix_name]["failed"]["PyTorch"][artifact_gpu] += 1
elif re.search("test_tokenization", line):
model_results[model]["failed"]["Tokenizers"][artifact_gpu] += 1
matrix_job_results[matrix_name]["failed"]["Tokenizers"][artifact_gpu] += 1
elif re.search("test_pipelines", line):
model_results[model]["failed"]["Pipelines"][artifact_gpu] += 1
matrix_job_results[matrix_name]["failed"]["Pipelines"][artifact_gpu] += 1
elif re.search("test_trainer", line):
model_results[model]["failed"]["Trainer"][artifact_gpu] += 1
matrix_job_results[matrix_name]["failed"]["Trainer"][artifact_gpu] += 1
elif re.search("onnx", line):
model_results[model]["failed"]["ONNX"][artifact_gpu] += 1
matrix_job_results[matrix_name]["failed"]["ONNX"][artifact_gpu] += 1
elif re.search("auto", line):
model_results[model]["failed"]["Auto"][artifact_gpu] += 1
matrix_job_results[matrix_name]["failed"]["Auto"][artifact_gpu] += 1
else:
model_results[model]["failed"]["Unclassified"][artifact_gpu] += 1
matrix_job_results[matrix_name]["failed"]["Unclassified"][artifact_gpu] += 1
unclassified_model_failures.append(line)
# Additional runs
@@ -1315,20 +1383,10 @@ if __name__ == "__main__":
if "workflow_run" in event_payload:
is_scheduled_ci_run = event_payload["workflow_run"]["event"] == "schedule"
# The values are used as the file names where to save the corresponding CI job results.
test_to_result_name = {
"Models": "model",
"Trainer & FSDP": "trainer_and_fsdp",
"PyTorch pipelines": "torch_pipeline",
"TensorFlow pipelines": "tf_pipeline",
"Examples directory": "example",
"DeepSpeed": "deepspeed",
}
test_name_and_result_pairs = []
if len(model_results) > 0:
if len(matrix_job_results) > 0:
test_name = job_to_test_map[job_name]
test_name_and_result_pairs.append((test_name, model_results))
test_name_and_result_pairs.append((test_name, matrix_job_results))
for test_name, result in additional_results.items():
test_name_and_result_pairs.append((test_name, result))
@@ -1346,8 +1404,8 @@ if __name__ == "__main__":
)
# Let's create a file contain job --> job link
if len(model_results) > 0:
target_results = model_results
if len(matrix_job_results) > 0:
target_results = matrix_job_results
else:
target_results = additional_results[job_to_test_map[job_name]]
@@ -1360,6 +1418,8 @@ if __name__ == "__main__":
for job, job_result in sorted_dict:
if job.startswith("models_"):
job = job[len("models_") :]
elif job.startswith("quantization_"):
job = job[len("quantization_") :]
job_links[job] = job_result["job_link"]
with open(f"ci_results_{job_name}/job_links.json", "w", encoding="UTF-8") as fp:
@@ -1424,7 +1484,7 @@ if __name__ == "__main__":
message = Message(
title,
ci_title,
model_results,
matrix_job_results,
additional_results,
selected_warnings=selected_warnings,
prev_ci_artifacts=prev_ci_artifacts,