From 1a1893e5d820ea811cc329326595e7fba108e1ea Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Thu, 29 Sep 2022 19:22:23 +0200 Subject: [PATCH] Update Past CI report script (#19228) * Simplify the error report * Add status placeholder * Add job links Co-authored-by: ydshieh --- utils/get_ci_error_statistics.py | 81 +++++++++++++++++++++++--------- 1 file changed, 59 insertions(+), 22 deletions(-) diff --git a/utils/get_ci_error_statistics.py b/utils/get_ci_error_statistics.py index 9d0beeaaca..790ec5e3d5 100644 --- a/utils/get_ci_error_statistics.py +++ b/utils/get_ci_error_statistics.py @@ -10,6 +10,28 @@ from collections import Counter import requests +def get_job_links(workflow_run_id): + """Extract job names and their job links in a GitHub Actions workflow run""" + + url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id}/jobs?per_page=100" + result = requests.get(url).json() + job_links = {} + + try: + job_links.update({job["name"]: job["html_url"] for job in result["jobs"]}) + pages_to_iterate_over = math.ceil((result["total_count"] - 100) / 100) + + for i in range(pages_to_iterate_over): + result = requests.get(url + f"&page={i + 2}").json() + job_links.update({job["name"]: job["html_url"] for job in result["jobs"]}) + + return job_links + except Exception as e: + print("Unknown error, could not fetch links.", e) + + return {} + + def get_artifacts_links(worflow_run_id): """Get all artifact links from a workflow run""" @@ -54,16 +76,17 @@ def download_artifact(artifact_name, artifact_url, output_dir, token): break -def get_errors_from_single_artifact(artifact_zip_path): +def get_errors_from_single_artifact(artifact_zip_path, job_links=None): """Extract errors from a downloaded artifact (in .zip format)""" errors = [] failed_tests = [] + job_name = None with zipfile.ZipFile(artifact_zip_path) as z: for filename in z.namelist(): if not os.path.isdir(filename): # read the file - if filename in ["failures_line.txt", "summary_short.txt"]: + if filename in ["failures_line.txt", "summary_short.txt", "job_name.txt"]: with z.open(filename) as f: for line in f: line = line.decode("UTF-8").strip() @@ -80,6 +103,8 @@ def get_errors_from_single_artifact(artifact_zip_path): # `test` is the test method that failed test = line[len("FAILED ") :] failed_tests.append(test) + elif filename == "job_name.txt": + job_name = line if len(errors) != len(failed_tests): raise ValueError( @@ -88,23 +113,26 @@ def get_errors_from_single_artifact(artifact_zip_path): " problem." ) - return errors, failed_tests + job_link = None + if job_name and job_links: + job_link = job_links.get(job_name, None) + + # A list with elements of the form (line of error, error, failed test) + result = [x + [y] + [job_link] for x, y in zip(errors, failed_tests)] + + return result -def get_all_errors(artifact_dir): +def get_all_errors(artifact_dir, job_links=None): """Extract errors from all artifact files""" errors = [] - failed_tests = [] paths = [os.path.join(artifact_dir, p) for p in os.listdir(artifact_dir) if p.endswith(".zip")] - for p in paths: - _errors, _failed_tests = get_errors_from_single_artifact(p) - errors.extend(_errors) - failed_tests.extend(_failed_tests) + errors.extend(get_errors_from_single_artifact(p, job_links=job_links)) - return errors, failed_tests + return errors def reduce_by_error(logs, error_filter=None): @@ -156,12 +184,12 @@ def reduce_by_model(logs, error_filter=None): def make_github_table(reduced_by_error): - header = "| no. | error |" - sep = "|-:|:-|" + header = "| no. | error | status |" + sep = "|-:|:-|:-|" lines = [header, sep] for error in reduced_by_error: count = reduced_by_error[error]["count"] - line = f"| {count} | {error[:100]} |" + line = f"| {count} | {error[:100]} | |" lines.append(line) return "\n".join(lines) @@ -201,6 +229,20 @@ if __name__ == "__main__": os.makedirs(args.output_dir, exist_ok=True) + _job_links = get_job_links(args.workflow_run_id) + job_links = {} + # To deal with `workflow_call` event, where a job name is the combination of the job names in the caller and callee. + # For example, `PyTorch 1.11 / Model tests (models/albert, single-gpu)`. + if _job_links: + for k, v in _job_links.items(): + # This is how GitHub actions combine job names. + if " / " in k: + index = k.find(" / ") + k = k[index + len(" / ") :] + job_links[k] = v + with open(os.path.join(args.output_dir, "job_links.json"), "w", encoding="UTF-8") as fp: + json.dump(job_links, fp, ensure_ascii=False, indent=4) + artifacts = get_artifacts_links(args.workflow_run_id) with open(os.path.join(args.output_dir, "artifacts.json"), "w", encoding="UTF-8") as fp: json.dump(artifacts, fp, ensure_ascii=False, indent=4) @@ -210,8 +252,9 @@ if __name__ == "__main__": # Be gentle to GitHub time.sleep(1) - errors, failed_tests = get_all_errors(args.output_dir) + errors = get_all_errors(args.output_dir, job_links=job_links) + # `e[1]` is the error counter = Counter() counter.update([e[1] for e in errors]) @@ -223,14 +266,8 @@ if __name__ == "__main__": with open(os.path.join(args.output_dir, "errors.json"), "w", encoding="UTF-8") as fp: json.dump(errors, fp, ensure_ascii=False, indent=4) - with open(os.path.join(args.output_dir, "failed_tests.json"), "w", encoding="UTF-8") as fp: - json.dump(failed_tests, fp, ensure_ascii=False, indent=4) - - # Produce tables for GitHub issue. - logs = [(error_line, error, failed_test) for (error_line, error), failed_test in zip(errors, failed_tests)] - - reduced_by_error = reduce_by_error(logs) - reduced_by_model = reduce_by_model(logs) + reduced_by_error = reduce_by_error(errors) + reduced_by_model = reduce_by_model(errors) s1 = make_github_table(reduced_by_error) s2 = make_github_table_per_model(reduced_by_model)