new failure CI reports for all jobs (#38298)

* new failures

* report_repo_id

* report_repo_id

* report_repo_id

* More fixes

* More fixes

* More fixes

* ruff

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar
2025-05-24 19:15:02 +02:00
committed by GitHub
parent 31f8a0fe8a
commit d0c9c66d1c
11 changed files with 248 additions and 121 deletions

View File

@@ -39,13 +39,16 @@ import os
import subprocess
result = subprocess.run(
["python3", "-m", "pytest", "-v", f"{target_test}"],
["python3", "-m", "pytest", "-v", "-rfEp", f"{target_test}"],
capture_output = True,
text=True,
)
print(result.stdout)
if len(result.stderr) > 0:
if f"PASSED {target_test}" in result.stdout:
print("test passed")
exit(0)
elif len(result.stderr) > 0:
if "ERROR: file or directory not found: " in result.stderr:
print("test file or directory not found in this commit")
exit(0)

View File

@@ -28,11 +28,15 @@ def get_daily_ci_runs(token, num_runs=7, workflow_id=None):
url = f"https://api.github.com/repos/huggingface/transformers/actions/workflows/{workflow_id}/runs"
# On `main` branch + event being `schedule` + not returning PRs + only `num_runs` results
url += f"?branch=main&event=schedule&exclude_pull_requests=true&per_page={num_runs}"
url += f"?branch=main&exclude_pull_requests=true&per_page={num_runs}"
result = requests.get(url, headers=headers).json()
result = requests.get(f"{url}&event=schedule", headers=headers).json()
workflow_runs = result["workflow_runs"]
if len(workflow_runs) == 0:
result = requests.get(f"{url}&event=workflow_run", headers=headers).json()
workflow_runs = result["workflow_runs"]
return result["workflow_runs"]
return workflow_runs
def get_last_daily_ci_run(token, workflow_run_id=None, workflow_id=None, commit_sha=None):

View File

@@ -30,8 +30,17 @@ from huggingface_hub import HfApi
from slack_sdk import WebClient
api = HfApi()
client = WebClient(token=os.environ["CI_SLACK_BOT_TOKEN"])
# A map associating the job names (specified by `inputs.job` in a workflow file) with the keys of
# `additional_files`. This is used to remove some entries in `additional_files` that are not concerned by a
# specific job. See below.
job_to_test_map = {
"run_models_gpu": "Models",
"run_trainer_and_fsdp_gpu": "Trainer & FSDP",
"run_pipelines_torch_gpu": "PyTorch pipelines",
"run_pipelines_tf_gpu": "TensorFlow pipelines",
"run_examples_gpu": "Examples directory",
"run_torch_cuda_extensions_gpu": "DeepSpeed",
}
NON_MODEL_TEST_MODULES = [
"deepspeed",
@@ -516,6 +525,7 @@ class Message:
if len(self.selected_warnings) > 0:
blocks.append(self.warnings)
new_failure_blocks = []
for idx, (prev_workflow_run_id, prev_ci_artifacts) in enumerate(
[self.prev_ci_artifacts] + self.other_ci_artifacts
):
@@ -524,13 +534,11 @@ class Message:
new_failure_blocks = self.get_new_model_failure_blocks(
prev_ci_artifacts=prev_ci_artifacts, with_header=False
)
if len(new_failure_blocks) > 0:
blocks.extend(new_failure_blocks)
# To save the list of new model failures and uploaed to hub repositories
extra_blocks = self.get_new_model_failure_blocks(prev_ci_artifacts=prev_ci_artifacts, to_truncate=False)
if extra_blocks:
filename = "new_model_failures"
filename = "new_failures"
if idx > 0:
filename = f"{filename}_against_{prev_workflow_run_id}"
@@ -541,17 +549,17 @@ class Message:
# upload results to Hub dataset
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/{filename}.txt")
commit_info = api.upload_file(
_ = api.upload_file(
path_or_fileobj=file_path,
path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/{filename}.txt",
repo_id="hf-internal-testing/transformers_daily_ci",
repo_id=report_repo_id,
repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
)
url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{report_repo_folder}/ci_results_{job_name}/{filename}.txt"
# extra processing to save to json format
new_failed_tests = {}
nb_new_failed_tests = 0
for line in failure_text.split():
if "https://github.com/huggingface/transformers/actions/runs" in line:
pattern = r"<(https://github.com/huggingface/transformers/actions/runs/.+?/job/.+?)\|(.+?)>"
@@ -563,36 +571,56 @@ class Message:
model = line.split("/")[1]
if model not in new_failed_tests:
new_failed_tests[model] = {"single-gpu": [], "multi-gpu": []}
for url, device in items:
for _, device in items:
new_failed_tests[model][f"{device}-gpu"].append(line)
nb_new_failed_tests += 1
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/{filename}.json")
with open(file_path, "w", encoding="UTF-8") as fp:
json.dump(new_failed_tests, fp, ensure_ascii=False, indent=4)
# upload results to Hub dataset
file_path = os.path.join(os.getcwd(), f"ci_results_{job_name}/{filename}.json")
_ = api.upload_file(
commit_info = api.upload_file(
path_or_fileobj=file_path,
path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/{filename}.json",
repo_id="hf-internal-testing/transformers_daily_ci",
repo_id=report_repo_id,
repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
)
new_failures_url = f"https://huggingface.co/datasets/{report_repo_id}/raw/{commit_info.oid}/{report_repo_folder}/ci_results_{job_name}/{filename}.json"
if idx == 0:
block = {
"type": "section",
"text": {
"type": "plain_text",
"text": " ",
"type": "mrkdwn",
"text": f"*There are {nb_new_failed_tests} new failed tests*\n\n(compared to previous run: <https://github.com/huggingface/transformers/actions/runs/{prev_workflow_run_id}|{prev_workflow_run_id}>)",
},
"accessory": {
"type": "button",
"text": {"type": "plain_text", "text": "Check New model failures"},
"url": url,
"text": {"type": "plain_text", "text": "Check new failures"},
"url": new_failures_url,
},
}
blocks.append(block)
else:
block = {
"type": "section",
"text": {
"type": "mrkdwn",
# TODO: We should NOT assume it's always Nvidia CI, but it's the case at this moment.
"text": f"*There are {nb_new_failed_tests} failed tests unique to this run*\n\n(compared to Nvidia CI: <https://github.com/huggingface/transformers/actions/runs/{prev_workflow_run_id}|{prev_workflow_run_id}>)",
},
"accessory": {
"type": "button",
"text": {"type": "plain_text", "text": "Check failures"},
"url": new_failures_url,
},
}
blocks.append(block)
if len(new_failure_blocks) > 0:
blocks.extend(new_failure_blocks)
return json.dumps(blocks)
@@ -717,14 +745,28 @@ class Message:
if prev_ci_artifacts is None:
return []
sorted_dict = sorted(self.model_results.items(), key=lambda t: t[0])
if len(self.model_results) > 0:
target_results = self.model_results
else:
target_results = self.additional_results[job_to_test_map[job_name]]
# Make the format uniform between `model_results` and `additional_results[XXX]`
if "failures" in target_results:
target_results = {job_name: target_results}
sorted_dict = sorted(target_results.items(), key=lambda t: t[0])
job = job_to_test_map[job_name]
prev_model_results = {}
if (
f"ci_results_{job_name}" in prev_ci_artifacts
and "model_results.json" in prev_ci_artifacts[f"ci_results_{job_name}"]
and f"{test_to_result_name[job]}_results.json" in prev_ci_artifacts[f"ci_results_{job_name}"]
):
prev_model_results = json.loads(prev_ci_artifacts[f"ci_results_{job_name}"]["model_results.json"])
prev_model_results = json.loads(
prev_ci_artifacts[f"ci_results_{job_name}"][f"{test_to_result_name[job]}_results.json"]
)
# Make the format uniform between `model_results` and `additional_results[XXX]`
if "failures" in prev_model_results:
prev_model_results = {job_name: prev_model_results}
all_failure_lines = {}
for job, job_result in sorted_dict:
@@ -751,7 +793,7 @@ class Message:
all_failure_lines[new_text].append(f"<{url}|{device}>" if url is not None else device)
MAX_ERROR_TEXT = 3000 - len("[Truncated]") - len("```New model failures```\n\n")
MAX_ERROR_TEXT = 3000 - len("[Truncated]") - len("```New failures```\n\n")
if not to_truncate:
MAX_ERROR_TEXT = float("inf")
failure_text = ""
@@ -768,10 +810,10 @@ class Message:
if failure_text:
if with_header:
blocks.append(
{"type": "header", "text": {"type": "plain_text", "text": "New model failures", "emoji": True}}
{"type": "header", "text": {"type": "plain_text", "text": "New failures", "emoji": True}}
)
else:
failure_text = f"*New model failures*\n\n{failure_text}"
failure_text = f"{failure_text}"
blocks.append({"type": "section", "text": {"type": "mrkdwn", "text": failure_text}})
return blocks
@@ -927,6 +969,9 @@ def pop_default(l: list[Any], i: int, default: Any) -> Any:
if __name__ == "__main__":
api = HfApi()
client = WebClient(token=os.environ["CI_SLACK_BOT_TOKEN"])
SLACK_REPORT_CHANNEL_ID = os.environ["SLACK_REPORT_CHANNEL"]
# runner_status = os.environ.get("RUNNER_STATUS")
@@ -1157,15 +1202,7 @@ if __name__ == "__main__":
elif ci_event.startswith("Push CI (AMD)"):
additional_files = {}
# A map associating the job names (specified by `inputs.job` in a workflow file) with the keys of
# `additional_files`. This is used to remove some entries in `additional_files` that are not concerned by a
# specific job. See below.
job_to_test_map = {
"run_pipelines_torch_gpu": "PyTorch pipelines",
"run_pipelines_tf_gpu": "TensorFlow pipelines",
"run_examples_gpu": "Examples directory",
"run_torch_cuda_extensions_gpu": "DeepSpeed",
}
report_repo_id = os.getenv("REPORT_REPO_ID")
# if it is not a scheduled run, upload the reports to a subfolder under `report_repo_folder`
report_repo_subfolder = ""
@@ -1258,81 +1295,100 @@ if __name__ == "__main__":
os.makedirs(os.path.join(os.getcwd(), f"ci_results_{job_name}"))
nvidia_daily_ci_workflow = "huggingface/transformers/.github/workflows/self-scheduled-caller.yml"
amd_daily_ci_workflows = (
"huggingface/transformers/.github/workflows/self-scheduled-amd-mi210-caller.yml",
"huggingface/transformers/.github/workflows/self-scheduled-amd-mi250-caller.yml",
)
is_nvidia_daily_ci_workflow = os.environ.get("GITHUB_WORKFLOW_REF").startswith(nvidia_daily_ci_workflow)
is_amd_daily_ci_workflow = os.environ.get("GITHUB_WORKFLOW_REF").startswith(amd_daily_ci_workflows)
is_scheduled_ci_run = os.environ.get("GITHUB_EVENT_NAME") == "schedule"
# For AMD workflow runs: the different AMD CI callers (MI210/MI250/MI300, etc.) are triggered by `workflow_run`
# event of `.github/workflows/self-scheduled-amd-caller.yml`.
if is_amd_daily_ci_workflow:
# Get the path to the file on the runner that contains the full event webhook payload.
event_payload_path = os.environ.get("GITHUB_EVENT_PATH")
# Load the event payload
with open(event_payload_path) as fp:
event_payload = json.load(fp)
# The event that triggers the `workflow_run` event.
if "workflow_run" in event_payload:
is_scheduled_ci_run = event_payload["event"] == "schedule"
# Only the model testing job is concerned: this condition is to avoid other jobs to upload the empty list as
# results.
if job_name == "run_models_gpu":
with open(f"ci_results_{job_name}/model_results.json", "w", encoding="UTF-8") as fp:
json.dump(model_results, fp, indent=4, ensure_ascii=False)
api.upload_file(
path_or_fileobj=f"ci_results_{job_name}/model_results.json",
path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/model_results.json",
repo_id="hf-internal-testing/transformers_daily_ci",
repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
)
# Let's create a file contain job --> job link
model_job_links = {}
sorted_dict = sorted(model_results.items(), key=lambda t: t[0])
for job, job_result in sorted_dict:
model_name = job
if model_name.startswith("models_"):
model_name = model_name[len("models_") :]
model_job_links[model_name] = job_result["job_link"]
with open(f"ci_results_{job_name}/model_job_links.json", "w", encoding="UTF-8") as fp:
json.dump(model_job_links, fp, indent=4, ensure_ascii=False)
api.upload_file(
path_or_fileobj=f"ci_results_{job_name}/model_job_links.json",
path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/model_job_links.json",
repo_id="hf-internal-testing/transformers_daily_ci",
repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
)
# Must have the same keys as in `additional_results`.
# The values are used as the file names where to save the corresponding CI job results.
test_to_result_name = {
"Models": "model",
"Trainer & FSDP": "trainer_and_fsdp",
"PyTorch pipelines": "torch_pipeline",
"TensorFlow pipelines": "tf_pipeline",
"Examples directory": "example",
"DeepSpeed": "deepspeed",
}
for job, job_result in additional_results.items():
with open(f"ci_results_{job_name}/{test_to_result_name[job]}_results.json", "w", encoding="UTF-8") as fp:
json.dump(job_result, fp, indent=4, ensure_ascii=False)
test_name_and_result_pairs = []
if len(model_results) > 0:
test_name = job_to_test_map[job_name]
test_name_and_result_pairs.append((test_name, model_results))
for test_name, result in additional_results.items():
test_name_and_result_pairs.append((test_name, result))
for test_name, result in test_name_and_result_pairs:
with open(f"ci_results_{job_name}/{test_to_result_name[test_name]}_results.json", "w", encoding="UTF-8") as fp:
json.dump(result, fp, indent=4, ensure_ascii=False)
api.upload_file(
path_or_fileobj=f"ci_results_{job_name}/{test_to_result_name[job]}_results.json",
path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/{test_to_result_name[job]}_results.json",
repo_id="hf-internal-testing/transformers_daily_ci",
path_or_fileobj=f"ci_results_{job_name}/{test_to_result_name[test_name]}_results.json",
path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/{test_to_result_name[test_name]}_results.json",
repo_id=report_repo_id,
repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
)
# Let's create a file contain job --> job link
if len(model_results) > 0:
target_results = model_results
else:
target_results = additional_results[job_to_test_map[job_name]]
# Make the format uniform between `model_results` and `additional_results[XXX]`
if "failures" in target_results:
target_results = {job_name: target_results}
job_links = {}
sorted_dict = sorted(target_results.items(), key=lambda t: t[0])
for job, job_result in sorted_dict:
if job.startswith("models_"):
job = job[len("models_") :]
job_links[job] = job_result["job_link"]
with open(f"ci_results_{job_name}/job_links.json", "w", encoding="UTF-8") as fp:
json.dump(job_links, fp, indent=4, ensure_ascii=False)
api.upload_file(
path_or_fileobj=f"ci_results_{job_name}/job_links.json",
path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/job_links.json",
repo_id=report_repo_id,
repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
)
prev_workflow_run_id = None
other_workflow_run_ids = []
if is_scheduled_ci_run:
# TODO: remove `if job_name == "run_models_gpu"`
if job_name == "run_models_gpu":
prev_workflow_run_id = get_last_daily_ci_workflow_run_id(
token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_id=workflow_id
prev_workflow_run_id = get_last_daily_ci_workflow_run_id(
token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_id=workflow_id
)
# For a scheduled run that is not the Nvidia's scheduled daily CI, add Nvidia's scheduled daily CI run as a target to compare.
if not is_nvidia_daily_ci_workflow:
# The id of the workflow `.github/workflows/self-scheduled-caller.yml` (not of a workflow run of it).
other_workflow_id = "90575235"
# We need to get the Nvidia's scheduled daily CI run that match the current run (i.e. run with the same commit SHA)
other_workflow_run_id = get_last_daily_ci_workflow_run_id(
token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_id=other_workflow_id, commit_sha=ci_sha
)
# For a scheduled run that is not the Nvidia's scheduled daily CI, add Nvidia's scheduled daily CI run as a target to compare.
if not is_nvidia_daily_ci_workflow:
# The id of the workflow `.github/workflows/self-scheduled-caller.yml` (not of a workflow run of it).
other_workflow_id = "90575235"
# We need to get the Nvidia's scheduled daily CI run that match the current run (i.e. run with the same commit SHA)
other_workflow_run_id = get_last_daily_ci_workflow_run_id(
token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_id=other_workflow_id, commit_sha=ci_sha
)
other_workflow_run_ids.append(other_workflow_run_id)
other_workflow_run_ids.append(other_workflow_run_id)
else:
prev_workflow_run_id = os.environ["PREV_WORKFLOW_RUN_ID"]
other_workflow_run_id = os.environ["OTHER_WORKFLOW_RUN_ID"]
@@ -1359,13 +1415,6 @@ if __name__ == "__main__":
else:
other_ci_artifacts.append((target_workflow_run_id, ci_artifacts))
job_to_test_map.update(
{
"run_models_gpu": "Models",
"run_trainer_and_fsdp_gpu": "Trainer & FSDP",
}
)
ci_name_in_report = ""
if job_name in job_to_test_map:
ci_name_in_report = job_to_test_map[job_name]

View File

@@ -274,11 +274,13 @@ if __name__ == "__main__":
with open(f"ci_results_{job_name}/quantization_results.json", "w", encoding="UTF-8") as fp:
json.dump(quantization_results, fp, indent=4, ensure_ascii=False)
report_repo_id = os.getenv("REPORT_REPO_ID")
# upload results to Hub dataset (only for the scheduled daily CI run on `main`)
api.upload_file(
path_or_fileobj=f"ci_results_{job_name}/quantization_results.json",
path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/quantization_results.json",
repo_id="hf-internal-testing/transformers_daily_ci",
repo_id=report_repo_id,
repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
)

View File

@@ -1,4 +1,4 @@
"""An internal script to process `new_model_failures_with_bad_commit.json` produced by `utils/check_bad_commit.py`.
"""An internal script to process `new_failures_with_bad_commit.json` produced by `utils/check_bad_commit.py`.
This is used by `.github/workflows/check_failed_model_tests.yml` to produce a slack report of the following form
@@ -24,11 +24,13 @@ from huggingface_hub import HfApi
if __name__ == "__main__":
api = HfApi()
with open("new_model_failures_with_bad_commit.json") as fp:
job_name = os.environ.get("JOB_NAME")
with open("new_failures_with_bad_commit.json") as fp:
data = json.load(fp)
with open("ci_results_run_models_gpu/model_job_links.json") as fp:
model_job_links = json.load(fp)
with open(f"ci_results_{job_name}/job_links.json") as fp:
job_links = json.load(fp)
# TODO: extend
team_members = [
@@ -67,7 +69,11 @@ if __name__ == "__main__":
for device, failed_tests in model_result.items():
# prepare job_link and add it to each entry of new failed test information.
# need to change from `single-gpu` to `single` and same for `multi-gpu` to match `job_link`.
job_link = model_job_links[model][device.replace("-gpu", "")]
key = model
if list(job_links.keys()) == [job_name]:
key = job_name
job_link = job_links[key][device.replace("-gpu", "")]
failed_tests = [x for x in failed_tests if x["author"] == author or x["merged_by"] == author]
for x in failed_tests:
x.update({"job_link": job_link})
@@ -92,16 +98,18 @@ if __name__ == "__main__":
if report_repo_subfolder:
report_repo_folder = f"{report_repo_folder}/{report_repo_subfolder}"
with open("new_model_failures_with_bad_commit_grouped_by_authors.json", "w") as fp:
report_repo_id = os.getenv("REPORT_REPO_ID")
with open("new_failures_with_bad_commit_grouped_by_authors.json", "w") as fp:
json.dump(new_data_full, fp, ensure_ascii=False, indent=4)
commit_info = api.upload_file(
path_or_fileobj="new_model_failures_with_bad_commit_grouped_by_authors.json",
path_in_repo=f"{report_repo_folder}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors.json",
repo_id="hf-internal-testing/transformers_daily_ci",
path_or_fileobj="new_failures_with_bad_commit_grouped_by_authors.json",
path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/new_failures_with_bad_commit_grouped_by_authors.json",
repo_id=report_repo_id,
repo_type="dataset",
token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
)
url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{report_repo_folder}/ci_results_run_models_gpu/new_model_failures_with_bad_commit_grouped_by_authors.json"
url = f"https://huggingface.co/datasets/{report_repo_id}/raw/{commit_info.oid}/{report_repo_folder}/ci_results_{job_name}/new_failures_with_bad_commit_grouped_by_authors.json"
# Add `GH_` prefix as keyword mention
output = {}