Fix quantization tests (#29914)
* revert back to torch 2.1.1 * run test * switch to torch 2.2.1 * udapte dockerfile * fix awq tests * fix test * run quanto tests * update tests * split quantization tests * fix * fix again * final fix * fix report artifact * build docker again * Revert "build docker again" This reverts commit 399a5f9d9308da071d79034f238c719de0f3532e. * debug * revert * style * new notification system * testing notfication * rebuild docker * fix_prev_ci_results * typo * remove warning * fix typo * fix artifact name * debug * issue fixed * debug again * fix * fix time * test notif with faling test * typo * issues again * final fix ? * run all quantization tests again * remove name to clear space * revert modfiication done on workflow * fix * build docker * build only quant docker * fix quantization ci * fix * fix report * better quantization_matrix * add print * revert to the basic one
This commit is contained in:
251
utils/notification_service_quantization.py
Normal file
251
utils/notification_service_quantization.py
Normal file
@@ -0,0 +1,251 @@
|
||||
# Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import ast
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from typing import Dict
|
||||
|
||||
from get_ci_error_statistics import get_jobs
|
||||
from notification_service import (
|
||||
Message,
|
||||
handle_stacktraces,
|
||||
handle_test_results,
|
||||
prepare_reports,
|
||||
retrieve_artifact,
|
||||
retrieve_available_artifacts,
|
||||
)
|
||||
from slack_sdk import WebClient
|
||||
|
||||
|
||||
client = WebClient(token=os.environ["CI_SLACK_BOT_TOKEN"])
|
||||
|
||||
|
||||
class QuantizationMessage(Message):
|
||||
def __init__(
|
||||
self,
|
||||
title: str,
|
||||
results: Dict,
|
||||
):
|
||||
self.title = title
|
||||
|
||||
# Failures and success of the modeling tests
|
||||
self.n_success = sum(r["success"] for r in results.values())
|
||||
self.single_gpu_failures = sum(r["failed"]["single"] for r in results.values())
|
||||
self.multi_gpu_failures = sum(r["failed"]["multi"] for r in results.values())
|
||||
self.n_failures = self.single_gpu_failures + self.multi_gpu_failures
|
||||
|
||||
self.n_tests = self.n_failures + self.n_success
|
||||
self.results = results
|
||||
self.thread_ts = None
|
||||
|
||||
@property
|
||||
def payload(self) -> str:
|
||||
blocks = [self.header]
|
||||
|
||||
if self.n_failures > 0:
|
||||
blocks.append(self.failures_overwiew)
|
||||
blocks.append(self.failures_detailed)
|
||||
|
||||
if self.n_failures == 0:
|
||||
blocks.append(self.no_failures)
|
||||
|
||||
return json.dumps(blocks)
|
||||
|
||||
@property
|
||||
def time(self) -> str:
|
||||
all_results = self.results.values()
|
||||
time_spent = []
|
||||
for r in all_results:
|
||||
if len(r["time_spent"]):
|
||||
time_spent.extend([x for x in r["time_spent"].split(", ") if len(x.strip())])
|
||||
total_secs = 0
|
||||
|
||||
for time in time_spent:
|
||||
time_parts = time.split(":")
|
||||
|
||||
# Time can be formatted as xx:xx:xx, as .xx, or as x.xx if the time spent was less than a minute.
|
||||
if len(time_parts) == 1:
|
||||
time_parts = [0, 0, time_parts[0]]
|
||||
|
||||
hours, minutes, seconds = int(time_parts[0]), int(time_parts[1]), float(time_parts[2])
|
||||
total_secs += hours * 3600 + minutes * 60 + seconds
|
||||
|
||||
hours, minutes, seconds = total_secs // 3600, (total_secs % 3600) // 60, total_secs % 60
|
||||
return f"{int(hours)}h{int(minutes)}m{int(seconds)}s"
|
||||
|
||||
@property
|
||||
def failures_overwiew(self) -> Dict:
|
||||
return {
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "plain_text",
|
||||
"text": (
|
||||
f"There were {self.n_failures} failures, out of {self.n_tests} tests.\n"
|
||||
f"The suite ran in {self.time}."
|
||||
),
|
||||
"emoji": True,
|
||||
},
|
||||
"accessory": {
|
||||
"type": "button",
|
||||
"text": {"type": "plain_text", "text": "Check Action results", "emoji": True},
|
||||
"url": f"https://github.com/huggingface/transformers/actions/runs/{os.environ['GITHUB_RUN_ID']}",
|
||||
},
|
||||
}
|
||||
|
||||
@property
|
||||
def failures_detailed(self) -> Dict:
|
||||
failures = {k: v["failed"] for k, v in self.results.items()}
|
||||
|
||||
individual_reports = []
|
||||
for key, value in failures.items():
|
||||
device_report = self.get_device_report(value)
|
||||
if sum(value.values()):
|
||||
report = f"{device_report}{key}"
|
||||
individual_reports.append(report)
|
||||
|
||||
header = "Single | Multi | Category\n"
|
||||
failures_report = prepare_reports(
|
||||
title="The following quantization tests had failures", header=header, reports=individual_reports
|
||||
)
|
||||
|
||||
return {"type": "section", "text": {"type": "mrkdwn", "text": failures_report}}
|
||||
|
||||
def post(self):
|
||||
payload = self.payload
|
||||
print("Sending the following payload")
|
||||
print(json.dumps({"blocks": json.loads(payload)}))
|
||||
|
||||
text = f"{self.n_failures} failures out of {self.n_tests} tests," if self.n_failures else "All tests passed."
|
||||
|
||||
self.thread_ts = client.chat_postMessage(
|
||||
channel=SLACK_REPORT_CHANNEL_ID,
|
||||
blocks=payload,
|
||||
text=text,
|
||||
)
|
||||
|
||||
def post_reply(self):
|
||||
if self.thread_ts is None:
|
||||
raise ValueError("Can only post reply if a post has been made.")
|
||||
|
||||
for job, job_result in self.results.items():
|
||||
if len(job_result["failures"]):
|
||||
for device, failures in job_result["failures"].items():
|
||||
blocks = self.get_reply_blocks(
|
||||
job,
|
||||
job_result,
|
||||
failures,
|
||||
device,
|
||||
text=f'Number of failures: {job_result["failed"][device]}',
|
||||
)
|
||||
|
||||
print("Sending the following reply")
|
||||
print(json.dumps({"blocks": blocks}))
|
||||
|
||||
client.chat_postMessage(
|
||||
channel="#transformers-ci-daily-quantization",
|
||||
text=f"Results for {job}",
|
||||
blocks=blocks,
|
||||
thread_ts=self.thread_ts["ts"],
|
||||
)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
setup_status = os.environ.get("SETUP_STATUS")
|
||||
SLACK_REPORT_CHANNEL_ID = os.environ["SLACK_REPORT_CHANNEL"]
|
||||
setup_failed = True if setup_status is not None and setup_status != "success" else False
|
||||
|
||||
# This env. variable is set in workflow file (under the job `send_results`).
|
||||
ci_event = os.environ["CI_EVENT"]
|
||||
|
||||
title = f"🤗 Results of the {ci_event} tests."
|
||||
|
||||
if setup_failed:
|
||||
Message.error_out(
|
||||
title, ci_title="", runner_not_available=False, runner_failed=False, setup_failed=setup_failed
|
||||
)
|
||||
exit(0)
|
||||
|
||||
arguments = sys.argv[1:][0]
|
||||
try:
|
||||
quantization_matrix = ast.literal_eval(arguments)
|
||||
# Need to change from elements like `quantization/bnb` to `quantization_bnb` (the ones used as artifact names).
|
||||
quantization_matrix = [x.replace("quantization/", "quantization_") for x in quantization_matrix]
|
||||
except SyntaxError:
|
||||
Message.error_out(title, ci_title="")
|
||||
raise ValueError("Errored out.")
|
||||
|
||||
available_artifacts = retrieve_available_artifacts()
|
||||
|
||||
quantization_results = {
|
||||
quant: {
|
||||
"failed": {"single": 0, "multi": 0},
|
||||
"success": 0,
|
||||
"time_spent": "",
|
||||
"failures": {},
|
||||
"job_link": {},
|
||||
}
|
||||
for quant in quantization_matrix
|
||||
if f"run_tests_quantization_torch_gpu_{quant}" in available_artifacts
|
||||
}
|
||||
|
||||
github_actions_jobs = get_jobs(
|
||||
workflow_run_id=os.environ["GITHUB_RUN_ID"], token=os.environ["ACCESS_REPO_INFO_TOKEN"]
|
||||
)
|
||||
github_actions_job_links = {job["name"]: job["html_url"] for job in github_actions_jobs}
|
||||
|
||||
artifact_name_to_job_map = {}
|
||||
for job in github_actions_jobs:
|
||||
for step in job["steps"]:
|
||||
if step["name"].startswith("Test suite reports artifacts: "):
|
||||
artifact_name = step["name"][len("Test suite reports artifacts: ") :]
|
||||
artifact_name_to_job_map[artifact_name] = job
|
||||
break
|
||||
|
||||
for quant in quantization_results.keys():
|
||||
for artifact_path in available_artifacts[f"run_tests_quantization_torch_gpu_{quant}"].paths:
|
||||
artifact = retrieve_artifact(artifact_path["path"], artifact_path["gpu"])
|
||||
if "stats" in artifact:
|
||||
# Link to the GitHub Action job
|
||||
job = artifact_name_to_job_map[artifact_path["path"]]
|
||||
quantization_results[quant]["job_link"][artifact_path["gpu"]] = job["html_url"]
|
||||
failed, success, time_spent = handle_test_results(artifact["stats"])
|
||||
quantization_results[quant]["failed"][artifact_path["gpu"]] += failed
|
||||
quantization_results[quant]["success"] += success
|
||||
quantization_results[quant]["time_spent"] += time_spent[1:-1] + ", "
|
||||
|
||||
stacktraces = handle_stacktraces(artifact["failures_line"])
|
||||
|
||||
for line in artifact["summary_short"].split("\n"):
|
||||
if line.startswith("FAILED "):
|
||||
line = line[len("FAILED ") :]
|
||||
line = line.split()[0].replace("\n", "")
|
||||
|
||||
if artifact_path["gpu"] not in quantization_results[quant]["failures"]:
|
||||
quantization_results[quant]["failures"][artifact_path["gpu"]] = []
|
||||
|
||||
quantization_results[quant]["failures"][artifact_path["gpu"]].append(
|
||||
{"line": line, "trace": stacktraces.pop(0)}
|
||||
)
|
||||
|
||||
message = QuantizationMessage(
|
||||
title,
|
||||
results=quantization_results,
|
||||
)
|
||||
|
||||
message.post()
|
||||
message.post_reply()
|
||||
Reference in New Issue
Block a user