From 367026000bbe9957f95eb1eb7d9649d78ac0b468 Mon Sep 17 00:00:00 2001
From: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
Date: Mon, 12 Sep 2022 15:20:31 +0200
Subject: [PATCH] create Past CI results as tables for GitHub issue (#18953)

* create Past CI results as tables for GitHub issue

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
---
 utils/get_ci_error_statistics.py | 87 ++++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/utils/get_ci_error_statistics.py b/utils/get_ci_error_statistics.py
index bca425663b..9d0beeaaca 100644
--- a/utils/get_ci_error_statistics.py
+++ b/utils/get_ci_error_statistics.py
@@ -107,6 +107,79 @@ def get_all_errors(artifact_dir):
     return errors, failed_tests
 
 
+def reduce_by_error(logs, error_filter=None):
+    """count each error"""
+
+    counter = Counter()
+    counter.update([x[1] for x in logs])
+    counts = counter.most_common()
+    r = {}
+    for error, count in counts:
+        if error_filter is None or error not in error_filter:
+            r[error] = {"count": count, "failed_tests": [(x[2], x[0]) for x in logs if x[1] == error]}
+
+    r = dict(sorted(r.items(), key=lambda item: item[1]["count"], reverse=True))
+    return r
+
+
+def get_model(test):
+    """Get the model name from a test method"""
+    test = test.split("::")[0]
+    if test.startswith("tests/models/"):
+        test = test.split("/")[2]
+    else:
+        test = None
+
+    return test
+
+
+def reduce_by_model(logs, error_filter=None):
+    """count each error per model"""
+
+    logs = [(x[0], x[1], get_model(x[2])) for x in logs]
+    logs = [x for x in logs if x[2] is not None]
+    tests = set([x[2] for x in logs])
+
+    r = {}
+    for test in tests:
+        counter = Counter()
+        # count by errors in `test`
+        counter.update([x[1] for x in logs if x[2] == test])
+        counts = counter.most_common()
+        error_counts = {error: count for error, count in counts if (error_filter is None or error not in error_filter)}
+        n_errors = sum(error_counts.values())
+        if n_errors > 0:
+            r[test] = {"count": n_errors, "errors": error_counts}
+
+    r = dict(sorted(r.items(), key=lambda item: item[1]["count"], reverse=True))
+    return r
+
+
+def make_github_table(reduced_by_error):
+    header = "| no. | error |"
+    sep = "|-:|:-|"
+    lines = [header, sep]
+    for error in reduced_by_error:
+        count = reduced_by_error[error]["count"]
+        line = f"| {count} | {error[:100]} |"
+        lines.append(line)
+
+    return "\n".join(lines)
+
+
+def make_github_table_per_model(reduced_by_model):
+    header = "| model | no. of errors | major error | count |"
+    sep = "|-:|-:|-:|-:|"
+    lines = [header, sep]
+    for model in reduced_by_model:
+        count = reduced_by_model[model]["count"]
+        error, _count = list(reduced_by_model[model]["errors"].items())[0]
+        line = f"| {model} | {count} | {error[:60]} | {_count} |"
+        lines.append(line)
+
+    return "\n".join(lines)
+
+
 if __name__ == "__main__":
 
     parser = argparse.ArgumentParser()
@@ -152,3 +225,17 @@ if __name__ == "__main__":
 
     with open(os.path.join(args.output_dir, "failed_tests.json"), "w", encoding="UTF-8") as fp:
         json.dump(failed_tests, fp, ensure_ascii=False, indent=4)
+
+    # Produce tables for GitHub issue.
+    logs = [(error_line, error, failed_test) for (error_line, error), failed_test in zip(errors, failed_tests)]
+
+    reduced_by_error = reduce_by_error(logs)
+    reduced_by_model = reduce_by_model(logs)
+
+    s1 = make_github_table(reduced_by_error)
+    s2 = make_github_table_per_model(reduced_by_model)
+
+    with open(os.path.join(args.output_dir, "reduced_by_error.txt"), "w", encoding="UTF-8") as fp:
+        fp.write(s1)
+    with open(os.path.join(args.output_dir, "reduced_by_model.txt"), "w", encoding="UTF-8") as fp:
+        fp.write(s2)