From b1375177fcc89b81495889c71dcffd9eceffa7a8 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Tue, 6 May 2025 15:10:29 +0200 Subject: [PATCH] add job links to new model failure report (#37973) * update for job link * stye --------- Co-authored-by: ydshieh --- utils/notification_service.py | 22 ++++++++++++++++++++++ utils/process_bad_commit_report.py | 8 ++++++++ 2 files changed, 30 insertions(+) diff --git a/utils/notification_service.py b/utils/notification_service.py index ea622d6091..399c792c9d 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -1260,6 +1260,28 @@ if __name__ == "__main__": token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), ) + # Let's create a file contain job --> job link + model_job_links = {} + sorted_dict = sorted(model_results.items(), key=lambda t: t[0]) + for job, job_result in sorted_dict: + model_name = job + if model_name.startswith("models_"): + model_name = model_name[len("models_") :] + model_job_links[model_name] = job_result["job_link"] + + with open(f"ci_results_{job_name}/model_job_links.json", "w", encoding="UTF-8") as fp: + json.dump(model_job_links, fp, indent=4, ensure_ascii=False) + + # upload results to Hub dataset (only for the scheduled daily CI run on `main`) + if is_scheduled_ci_run: + api.upload_file( + path_or_fileobj=f"ci_results_{job_name}/model_job_links.json", + path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/model_job_links.json", + repo_id="hf-internal-testing/transformers_daily_ci", + repo_type="dataset", + token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), + ) + # Must have the same keys as in `additional_results`. # The values are used as the file names where to save the corresponding CI job results. test_to_result_name = { diff --git a/utils/process_bad_commit_report.py b/utils/process_bad_commit_report.py index 19812ff21f..bba03b4bd1 100644 --- a/utils/process_bad_commit_report.py +++ b/utils/process_bad_commit_report.py @@ -27,6 +27,9 @@ if __name__ == "__main__": with open("new_model_failures_with_bad_commit.json") as fp: data = json.load(fp) + with open("ci_results_run_models_gpu/model_job_links.json") as fp: + model_job_links = json.load(fp) + # TODO: extend team_members = [ "ydshieh", @@ -62,7 +65,12 @@ if __name__ == "__main__": for author, _data in new_data_full.items(): for model, model_result in _data.items(): for device, failed_tests in model_result.items(): + # prepare job_link and add it to each entry of new failed test information. + # need to change from `single-gpu` to `single` and same for `multi-gpu` to match `job_link`. + job_link = model_job_links[model][device.replace("-gpu", "")] failed_tests = [x for x in failed_tests if x["author"] == author or x["merged_by"] == author] + for x in failed_tests: + x.update({"job_link": job_link}) model_result[device] = failed_tests _data[model] = {k: v for k, v in model_result.items() if len(v) > 0} new_data_full[author] = {k: v for k, v in _data.items() if len(v) > 0}