diff --git a/utils/compare_test_runs.py b/utils/compare_test_runs.py new file mode 100644 index 0000000000..9724c5506b --- /dev/null +++ b/utils/compare_test_runs.py @@ -0,0 +1,91 @@ +# Copyright 2025 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + + +def normalize_test_line(line): + line = line.strip() + + # Normalize SKIPPED/XFAIL/etc with path:line and reason + match = re.match(r"^(SKIPPED|XFAIL|XPASS|EXPECTEDFAIL)\s+\[?\d*\]?\s*(\S+:\d+)", line) + if match: + status, location = match.groups() + return f"{status} {location}" + + # Normalize ERROR/FAILED lines with optional message + if line.startswith("ERROR") or line.startswith("FAILED"): + return re.split(r"\s+-\s+", line)[0].strip() + + return line + + +def parse_summary_file(file_path): + test_set = set() + with open(file_path, "r", encoding="utf-8") as f: + in_summary = False + for line in f: + if line.strip().startswith("==="): + in_summary = not in_summary + continue + if in_summary: + stripped = line.strip() + if stripped: + normalized = normalize_test_line(stripped) + test_set.add(normalized) + return test_set + + +def compare_job_sets(job_set1, job_set2): + all_job_names = sorted(set(job_set1) | set(job_set2)) + report_lines = [] + + for job_name in all_job_names: + file1 = job_set1.get(job_name) + file2 = job_set2.get(job_name) + + tests1 = parse_summary_file(file1) if file1 else set() + tests2 = parse_summary_file(file2) if file2 else set() + + added = tests2 - tests1 + removed = tests1 - tests2 + + if added or removed: + report_lines.append(f"=== Diff for job: {job_name} ===") + if removed: + report_lines.append("--- Absent in current run:") + for test in sorted(removed): + report_lines.append(f" - {test}") + if added: + report_lines.append("+++ Appeared in current run:") + for test in sorted(added): + report_lines.append(f" + {test}") + report_lines.append("") # blank line + + return "\n".join(report_lines) if report_lines else "No differences found." + + +# Example usage: +# job_set_1 = { +# "albert": "prev/multi-gpu_run_models_gpu_models/albert_test_reports/summary_short.txt", +# "bloom": "prev/multi-gpu_run_models_gpu_models/bloom_test_reports/summary_short.txt", +# } + +# job_set_2 = { +# "albert": "curr/multi-gpu_run_models_gpu_models/albert_test_reports/summary_short.txt", +# "bloom": "curr/multi-gpu_run_models_gpu_models/bloom_test_reports/summary_short.txt", +# } + +# report = compare_job_sets(job_set_1, job_set_2) +# print(report) diff --git a/utils/get_previous_daily_ci.py b/utils/get_previous_daily_ci.py index 2c0d1a8101..97d3696465 100644 --- a/utils/get_previous_daily_ci.py +++ b/utils/get_previous_daily_ci.py @@ -91,7 +91,12 @@ def get_last_daily_ci_run_commit(token, workflow_run_id=None, workflow_id=None, def get_last_daily_ci_artifacts( - artifact_names, output_dir, token, workflow_run_id=None, workflow_id=None, commit_sha=None + output_dir, + token, + workflow_run_id=None, + workflow_id=None, + commit_sha=None, + artifact_names=None, ): """Get the artifacts of last completed workflow run id of the scheduled (daily) CI.""" workflow_run_id = get_last_daily_ci_workflow_run_id( @@ -99,37 +104,56 @@ def get_last_daily_ci_artifacts( ) if workflow_run_id is not None: artifacts_links = get_artifacts_links(worflow_run_id=workflow_run_id, token=token) + + if artifact_names is None: + artifact_names = artifacts_links.keys() + + downloaded_artifact_names = [] for artifact_name in artifact_names: if artifact_name in artifacts_links: artifact_url = artifacts_links[artifact_name] download_artifact( artifact_name=artifact_name, artifact_url=artifact_url, output_dir=output_dir, token=token ) + downloaded_artifact_names.append(artifact_name) + + return downloaded_artifact_names def get_last_daily_ci_reports( - artifact_names, output_dir, token, workflow_run_id=None, workflow_id=None, commit_sha=None + output_dir, + token, + workflow_run_id=None, + workflow_id=None, + commit_sha=None, + artifact_names=None, ): """Get the artifacts' content of the last completed workflow run id of the scheduled (daily) CI.""" - get_last_daily_ci_artifacts( - artifact_names, + downloaded_artifact_names = get_last_daily_ci_artifacts( output_dir, token, workflow_run_id=workflow_run_id, workflow_id=workflow_id, commit_sha=commit_sha, + artifact_names=artifact_names, ) results = {} - for artifact_name in artifact_names: + for artifact_name in downloaded_artifact_names: artifact_zip_path = os.path.join(output_dir, f"{artifact_name}.zip") if os.path.isfile(artifact_zip_path): - results[artifact_name] = {} + target_dir = os.path.join(output_dir, artifact_name) with zipfile.ZipFile(artifact_zip_path) as z: - for filename in z.namelist(): - if not os.path.isdir(filename): - # read the file - with z.open(filename) as f: - results[artifact_name][filename] = f.read().decode("UTF-8") + z.extractall(target_dir) + + results[artifact_name] = {} + filename = os.listdir(target_dir) + for filename in filename: + file_path = os.path.join(target_dir, filename) + if not os.path.isdir(file_path): + # read the file + with open(file_path) as fp: + content = fp.read() + results[artifact_name][filename] = content return results diff --git a/utils/notification_service.py b/utils/notification_service.py index 850ec33773..04beca72c3 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -24,6 +24,7 @@ import time from typing import Any, Optional, Union import requests +from compare_test_runs import compare_job_sets from get_ci_error_statistics import get_jobs from get_previous_daily_ci import get_last_daily_ci_reports, get_last_daily_ci_run, get_last_daily_ci_workflow_run_id from huggingface_hub import HfApi @@ -672,6 +673,21 @@ class Message: } blocks.append(block) + if diff_file_url is not None: + block = { + "type": "section", + "text": { + "type": "mrkdwn", + "text": f"*Test results diff*\n\n(compared to previous run: )", + }, + "accessory": { + "type": "button", + "text": {"type": "plain_text", "text": "Check test result diff file"}, + "url": diff_file_url, + }, + } + blocks.append(block) + if len(new_failure_blocks) > 0: blocks.extend(new_failure_blocks) @@ -1460,13 +1476,14 @@ if __name__ == "__main__": prev_ci_artifacts = (None, None) other_ci_artifacts = [] + output_dir = os.path.join(os.getcwd(), "previous_reports") + os.makedirs(output_dir, exist_ok=True) + for idx, target_workflow_run_id in enumerate([prev_workflow_run_id] + other_workflow_run_ids): if target_workflow_run_id is None or target_workflow_run_id == "": continue else: artifact_names = [f"ci_results_{job_name}"] - output_dir = os.path.join(os.getcwd(), "previous_reports") - os.makedirs(output_dir, exist_ok=True) ci_artifacts = get_last_daily_ci_reports( artifact_names=artifact_names, output_dir=output_dir, @@ -1478,6 +1495,44 @@ if __name__ == "__main__": else: other_ci_artifacts.append((target_workflow_run_id, ci_artifacts)) + # Only for AMD at this moment. + # TODO: put this into a method + if is_amd_daily_ci_workflow: + diff_file_url = None + if not (prev_workflow_run_id is None or prev_workflow_run_id == ""): + ci_artifacts = get_last_daily_ci_reports( + artifact_names=None, + output_dir=output_dir, + token=os.environ["ACCESS_REPO_INFO_TOKEN"], + workflow_run_id=prev_workflow_run_id, + ) + + current_artifacts = sorted([d for d in os.listdir() if os.path.isdir(d) and d.endswith("_test_reports")]) + prev_artifacts = sorted([d for d in os.listdir(output_dir) if os.path.isdir(os.path.join(output_dir, d)) and d.endswith("_test_reports")]) # fmt: skip + + current_artifacts_set = {} + for d in current_artifacts: + current_artifacts_set[d] = os.path.join(d, "summary_short.txt") + + prev_artifacts_set = {} + for d in prev_artifacts: + prev_artifacts_set[d] = os.path.join(output_dir, d, "summary_short.txt") + + report = compare_job_sets(prev_artifacts_set, current_artifacts_set) + + with open(f"ci_results_{job_name}/test_results_diff.json", "w") as fp: + fp.write(report) + + # upload + commit_info = api.upload_file( + path_or_fileobj=f"ci_results_{job_name}/test_results_diff.json", + path_in_repo=f"{report_repo_folder}/ci_results_{job_name}/test_results_diff.json", + repo_id=report_repo_id, + repo_type="dataset", + token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), + ) + diff_file_url = f"https://huggingface.co/datasets/{report_repo_id}/resolve/{commit_info.oid}/{report_repo_folder}/ci_results_{job_name}/test_results_diff.json" + ci_name_in_report = "" if job_name in job_to_test_map: ci_name_in_report = job_to_test_map[job_name]