Refactor doctest (#30210)
* fix * update * fix * update * fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -12,16 +12,13 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import collections
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from fnmatch import fnmatch
|
||||
from typing import Dict, List
|
||||
|
||||
import requests
|
||||
from get_ci_error_statistics import get_jobs
|
||||
from slack_sdk import WebClient
|
||||
|
||||
|
||||
@@ -66,9 +63,8 @@ class Message:
|
||||
def __init__(self, title: str, doc_test_results: Dict):
|
||||
self.title = title
|
||||
|
||||
self._time_spent = doc_test_results["time_spent"].split(",")[0]
|
||||
self.n_success = doc_test_results["success"]
|
||||
self.n_failures = doc_test_results["failures"]
|
||||
self.n_success = sum(job_result["n_success"] for job_result in doc_test_results.values())
|
||||
self.n_failures = sum(job_result["n_failures"] for job_result in doc_test_results.values())
|
||||
self.n_tests = self.n_success + self.n_failures
|
||||
|
||||
# Failures and success of the modeling tests
|
||||
@@ -76,7 +72,8 @@ class Message:
|
||||
|
||||
@property
|
||||
def time(self) -> str:
|
||||
time_spent = [self._time_spent]
|
||||
all_results = [*self.doc_test_results.values()]
|
||||
time_spent = [r["time_spent"].split(", ")[0] for r in all_results if len(r["time_spent"])]
|
||||
total_secs = 0
|
||||
|
||||
for time in time_spent:
|
||||
@@ -205,7 +202,7 @@ class Message:
|
||||
print(json.dumps({"blocks": json.loads(payload)}))
|
||||
|
||||
client.chat_postMessage(
|
||||
channel=os.environ["CI_SLACK_CHANNEL_ID_DAILY"],
|
||||
channel=SLACK_REPORT_CHANNEL_ID,
|
||||
text="There was an issue running the tests.",
|
||||
blocks=payload,
|
||||
)
|
||||
@@ -217,7 +214,7 @@ class Message:
|
||||
text = f"{self.n_failures} failures out of {self.n_tests} tests," if self.n_failures else "All tests passed."
|
||||
|
||||
self.thread_ts = client.chat_postMessage(
|
||||
channel=os.environ["CI_SLACK_CHANNEL_ID_DAILY"],
|
||||
channel=SLACK_REPORT_CHANNEL_ID,
|
||||
blocks=self.payload,
|
||||
text=text,
|
||||
)
|
||||
@@ -248,7 +245,7 @@ class Message:
|
||||
}
|
||||
|
||||
return [
|
||||
{"type": "header", "text": {"type": "plain_text", "text": title.upper(), "emoji": True}},
|
||||
{"type": "header", "text": {"type": "plain_text", "text": title, "emoji": True}},
|
||||
content,
|
||||
{"type": "section", "text": {"type": "mrkdwn", "text": failure_text}},
|
||||
]
|
||||
@@ -257,24 +254,19 @@ class Message:
|
||||
if self.thread_ts is None:
|
||||
raise ValueError("Can only post reply if a post has been made.")
|
||||
|
||||
job_link = self.doc_test_results.pop("job_link")
|
||||
self.doc_test_results.pop("failures")
|
||||
self.doc_test_results.pop("success")
|
||||
self.doc_test_results.pop("time_spent")
|
||||
|
||||
sorted_dict = sorted(self.doc_test_results.items(), key=lambda t: t[0])
|
||||
for job, job_result in sorted_dict:
|
||||
if len(job_result["failures"]):
|
||||
for job_name, job_result in sorted_dict:
|
||||
if len(job_result["failures"]) > 0:
|
||||
text = f"*Num failures* :{len(job_result['failed'])} \n"
|
||||
failures = job_result["failures"]
|
||||
blocks = self.get_reply_blocks(job, job_link, failures, text=text)
|
||||
blocks = self.get_reply_blocks(job_name, job_result["job_link"], failures, text=text)
|
||||
|
||||
print("Sending the following reply")
|
||||
print(json.dumps({"blocks": blocks}))
|
||||
|
||||
client.chat_postMessage(
|
||||
channel=os.environ["CI_SLACK_CHANNEL_ID_DAILY"],
|
||||
text=f"Results for {job}",
|
||||
channel=SLACK_REPORT_CHANNEL_ID,
|
||||
text=f"Results for {job_name}",
|
||||
blocks=blocks,
|
||||
thread_ts=self.thread_ts["ts"],
|
||||
)
|
||||
@@ -282,27 +274,6 @@ class Message:
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
def get_job_links():
|
||||
run_id = os.environ["GITHUB_RUN_ID"]
|
||||
url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{run_id}/jobs?per_page=100"
|
||||
result = requests.get(url).json()
|
||||
jobs = {}
|
||||
|
||||
try:
|
||||
jobs.update({job["name"]: job["html_url"] for job in result["jobs"]})
|
||||
pages_to_iterate_over = math.ceil((result["total_count"] - 100) / 100)
|
||||
|
||||
for i in range(pages_to_iterate_over):
|
||||
result = requests.get(url + f"&page={i + 2}").json()
|
||||
jobs.update({job["name"]: job["html_url"] for job in result["jobs"]})
|
||||
|
||||
return jobs
|
||||
except Exception as e:
|
||||
print("Unknown error, could not fetch links.", e)
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
def retrieve_artifact(name: str):
|
||||
_artifact = {}
|
||||
|
||||
@@ -344,57 +315,70 @@ def retrieve_available_artifacts():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
github_actions_job_links = get_job_links()
|
||||
available_artifacts = retrieve_available_artifacts()
|
||||
SLACK_REPORT_CHANNEL_ID = os.environ["SLACK_REPORT_CHANNEL"]
|
||||
|
||||
docs = collections.OrderedDict(
|
||||
[
|
||||
("*.py", "API Examples"),
|
||||
("*.md", "MD Examples"),
|
||||
]
|
||||
github_actions_jobs = get_jobs(
|
||||
workflow_run_id=os.environ["GITHUB_RUN_ID"], token=os.environ["ACCESS_REPO_INFO_TOKEN"]
|
||||
)
|
||||
|
||||
# This dict will contain all the information relative to each doc test category:
|
||||
# - failed: list of failed tests
|
||||
# - failures: dict in the format 'test': 'error_message'
|
||||
doc_test_results = {
|
||||
v: {
|
||||
"failed": [],
|
||||
"failures": {},
|
||||
}
|
||||
for v in docs.values()
|
||||
}
|
||||
artifact_name_to_job_map = {}
|
||||
for job in github_actions_jobs:
|
||||
for step in job["steps"]:
|
||||
if step["name"].startswith("Test suite reports artifacts: "):
|
||||
artifact_name = step["name"][len("Test suite reports artifacts: ") :]
|
||||
artifact_name_to_job_map[artifact_name] = job
|
||||
break
|
||||
|
||||
# Link to the GitHub Action job
|
||||
doc_test_results["job_link"] = github_actions_job_links.get("run_doctests")
|
||||
available_artifacts = retrieve_available_artifacts()
|
||||
|
||||
artifact_path = available_artifacts["doc_tests_gpu_test_reports"].paths[0]
|
||||
artifact = retrieve_artifact(artifact_path["name"])
|
||||
if "stats" in artifact:
|
||||
failed, success, time_spent = handle_test_results(artifact["stats"])
|
||||
doc_test_results["failures"] = failed
|
||||
doc_test_results["success"] = success
|
||||
doc_test_results["time_spent"] = time_spent[1:-1] + ", "
|
||||
doc_test_results = {}
|
||||
# `artifact_key` is the artifact path
|
||||
for artifact_key, artifact_obj in available_artifacts.items():
|
||||
artifact_path = artifact_obj.paths[0]
|
||||
if not artifact_path["path"].startswith("doc_tests_gpu_test_reports_"):
|
||||
continue
|
||||
|
||||
all_failures = extract_first_line_failure(artifact["failures_short"])
|
||||
for line in artifact["summary_short"].split("\n"):
|
||||
if re.search("FAILED", line):
|
||||
line = line.replace("FAILED ", "")
|
||||
line = line.split()[0].replace("\n", "")
|
||||
# change "_" back to "/" (to show the job name as path)
|
||||
job_name = artifact_path["path"].replace("doc_tests_gpu_test_reports_", "").replace("_", "/")
|
||||
|
||||
if "::" in line:
|
||||
file_path, test = line.split("::")
|
||||
else:
|
||||
file_path, test = line, line
|
||||
# This dict (for each job) will contain all the information relative to each doc test job, in particular:
|
||||
# - failed: list of failed tests
|
||||
# - failures: dict in the format 'test': 'error_message'
|
||||
job_result = {}
|
||||
doc_test_results[job_name] = job_result
|
||||
|
||||
for file_regex in docs.keys():
|
||||
if fnmatch(file_path, file_regex):
|
||||
category = docs[file_regex]
|
||||
doc_test_results[category]["failed"].append(test)
|
||||
job = artifact_name_to_job_map[artifact_path["path"]]
|
||||
job_result["job_link"] = job["html_url"]
|
||||
job_result["category"] = "Python Examples" if job_name.startswith("src/") else "MD Examples"
|
||||
|
||||
failure = all_failures[test] if test in all_failures else "N/A"
|
||||
doc_test_results[category]["failures"][test] = failure
|
||||
break
|
||||
artifact = retrieve_artifact(artifact_path["path"])
|
||||
if "stats" in artifact:
|
||||
failed, success, time_spent = handle_test_results(artifact["stats"])
|
||||
job_result["n_failures"] = failed
|
||||
job_result["n_success"] = success
|
||||
job_result["time_spent"] = time_spent[1:-1] + ", "
|
||||
job_result["failed"] = []
|
||||
job_result["failures"] = {}
|
||||
|
||||
all_failures = extract_first_line_failure(artifact["failures_short"])
|
||||
for line in artifact["summary_short"].split("\n"):
|
||||
if re.search("FAILED", line):
|
||||
line = line.replace("FAILED ", "")
|
||||
line = line.split()[0].replace("\n", "")
|
||||
|
||||
if "::" in line:
|
||||
file_path, test = line.split("::")
|
||||
else:
|
||||
file_path, test = line, line
|
||||
|
||||
job_result["failed"].append(test)
|
||||
failure = all_failures[test] if test in all_failures else "N/A"
|
||||
job_result["failures"][test] = failure
|
||||
|
||||
# Save and to be uploaded as artifact
|
||||
os.makedirs("doc_test_results", exist_ok=True)
|
||||
with open("doc_test_results/doc_test_results.json", "w", encoding="UTF-8") as fp:
|
||||
json.dump(doc_test_results, fp, ensure_ascii=False, indent=4)
|
||||
|
||||
message = Message("🤗 Results of the doc tests.", doc_test_results)
|
||||
message.post()
|
||||
|
||||
Reference in New Issue
Block a user