Make Slack CI reporting stronger (#21823)

* Use token

* Avoid failure

* better error

* Fix

* fix style

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Yih-Dar
2023-02-28 17:12:44 +01:00
committed by GitHub
parent 6ca844582c
commit aab895c396
10 changed files with 61 additions and 62 deletions

View File

@@ -57,6 +57,7 @@ jobs:
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
CI_EVENT: runner status check CI_EVENT: runner status check
RUNNER_STATUS: ${{ needs.check_runner_status.result }} RUNNER_STATUS: ${{ needs.check_runner_status.result }}
OFFLINE_RUNNERS: ${{ needs.check_runner_status.outputs.offline_runners }} OFFLINE_RUNNERS: ${{ needs.check_runner_status.outputs.offline_runners }}

View File

@@ -291,6 +291,7 @@ jobs:
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_PAST_FUTURE }} CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_PAST_FUTURE }}
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
CI_EVENT: nightly-build CI_EVENT: nightly-build
RUNNER_STATUS: ${{ needs.check_runner_status.result }} RUNNER_STATUS: ${{ needs.check_runner_status.result }}
RUNNER_ENV_STATUS: ${{ needs.check_runners.result }} RUNNER_ENV_STATUS: ${{ needs.check_runners.result }}

View File

@@ -254,6 +254,7 @@ jobs:
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_PAST_FUTURE }} CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_PAST_FUTURE }}
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
CI_EVENT: Past CI - ${{ inputs.framework }}-${{ inputs.version }} CI_EVENT: Past CI - ${{ inputs.framework }}-${{ inputs.version }}
RUNNER_STATUS: ${{ needs.check_runner_status.result }} RUNNER_STATUS: ${{ needs.check_runner_status.result }}
RUNNER_ENV_STATUS: ${{ needs.check_runners.result }} RUNNER_ENV_STATUS: ${{ needs.check_runners.result }}

View File

@@ -568,6 +568,7 @@ jobs:
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }} CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
CI_EVENT: push CI_EVENT: push
CI_TITLE_PUSH: ${{ github.event.head_commit.message }} CI_TITLE_PUSH: ${{ github.event.head_commit.message }}
CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }} CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }}

View File

@@ -482,6 +482,7 @@ jobs:
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
CI_EVENT: scheduled CI_EVENT: scheduled
RUNNER_STATUS: ${{ needs.check_runner_status.result }} RUNNER_STATUS: ${{ needs.check_runner_status.result }}
RUNNER_ENV_STATUS: ${{ needs.check_runners.result }} RUNNER_ENV_STATUS: ${{ needs.check_runners.result }}

View File

@@ -83,19 +83,14 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
# Required parameters # Required parameters
parser.add_argument( parser.add_argument("--workflow_run_id", type=str, required=True, help="A GitHub Actions workflow run id.")
"--workflow_run_id", default=None, type=str, required=True, help="A GitHub Actions workflow run id."
)
parser.add_argument( parser.add_argument(
"--output_dir", "--output_dir",
default=None,
type=str, type=str,
required=True, required=True,
help="Where to store the downloaded artifacts and other result files.", help="Where to store the downloaded artifacts and other result files.",
) )
parser.add_argument( parser.add_argument("--token", default=None, type=str, help="A token that has actions:read permission.")
"--token", default=None, type=str, required=True, help="A token that has actions:read permission."
)
# optional parameters # optional parameters
parser.add_argument( parser.add_argument(
"--targets", "--targets",
@@ -119,7 +114,7 @@ if __name__ == "__main__":
os.makedirs(args.output_dir, exist_ok=True) os.makedirs(args.output_dir, exist_ok=True)
# get download links # get download links
artifacts = get_artifacts_links(args.workflow_run_id) artifacts = get_artifacts_links(args.workflow_run_id, token=args.token)
with open(os.path.join(args.output_dir, "artifacts.json"), "w", encoding="UTF-8") as fp: with open(os.path.join(args.output_dir, "artifacts.json"), "w", encoding="UTF-8") as fp:
json.dump(artifacts, fp, ensure_ascii=False, indent=4) json.dump(artifacts, fp, ensure_ascii=False, indent=4)

View File

@@ -4,17 +4,22 @@ import math
import os import os
import subprocess import subprocess
import time import time
import traceback
import zipfile import zipfile
from collections import Counter from collections import Counter
import requests import requests
def get_job_links(workflow_run_id): def get_job_links(workflow_run_id, token=None):
"""Extract job names and their job links in a GitHub Actions workflow run""" """Extract job names and their job links in a GitHub Actions workflow run"""
headers = None
if token is not None:
headers = {"Accept": "application/vnd.github+json", "Authorization": f"Bearer {token}"}
url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id}/jobs?per_page=100" url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id}/jobs?per_page=100"
result = requests.get(url).json() result = requests.get(url, headers=headers).json()
job_links = {} job_links = {}
try: try:
@@ -22,21 +27,25 @@ def get_job_links(workflow_run_id):
pages_to_iterate_over = math.ceil((result["total_count"] - 100) / 100) pages_to_iterate_over = math.ceil((result["total_count"] - 100) / 100)
for i in range(pages_to_iterate_over): for i in range(pages_to_iterate_over):
result = requests.get(url + f"&page={i + 2}").json() result = requests.get(url + f"&page={i + 2}", headers=headers).json()
job_links.update({job["name"]: job["html_url"] for job in result["jobs"]}) job_links.update({job["name"]: job["html_url"] for job in result["jobs"]})
return job_links return job_links
except Exception as e: except Exception:
print("Unknown error, could not fetch links.", e) print(f"Unknown error, could not fetch links:\n{traceback.format_exc()}")
return {} return {}
def get_artifacts_links(worflow_run_id): def get_artifacts_links(worflow_run_id, token=None):
"""Get all artifact links from a workflow run""" """Get all artifact links from a workflow run"""
headers = None
if token is not None:
headers = {"Accept": "application/vnd.github+json", "Authorization": f"Bearer {token}"}
url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{worflow_run_id}/artifacts?per_page=100" url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{worflow_run_id}/artifacts?per_page=100"
result = requests.get(url).json() result = requests.get(url, headers=headers).json()
artifacts = {} artifacts = {}
try: try:
@@ -44,12 +53,12 @@ def get_artifacts_links(worflow_run_id):
pages_to_iterate_over = math.ceil((result["total_count"] - 100) / 100) pages_to_iterate_over = math.ceil((result["total_count"] - 100) / 100)
for i in range(pages_to_iterate_over): for i in range(pages_to_iterate_over):
result = requests.get(url + f"&page={i + 2}").json() result = requests.get(url + f"&page={i + 2}", headers=headers).json()
artifacts.update({artifact["name"]: artifact["archive_download_url"] for artifact in result["artifacts"]}) artifacts.update({artifact["name"]: artifact["archive_download_url"] for artifact in result["artifacts"]})
return artifacts return artifacts
except Exception as e: except Exception:
print("Unknown error, could not fetch links.", e) print(f"Unknown error, could not fetch links:\n{traceback.format_exc()}")
return {} return {}
@@ -211,24 +220,19 @@ def make_github_table_per_model(reduced_by_model):
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
# Required parameters # Required parameters
parser.add_argument( parser.add_argument("--workflow_run_id", type=str, required=True, help="A GitHub Actions workflow run id.")
"--workflow_run_id", default=None, type=str, required=True, help="A GitHub Actions workflow run id."
)
parser.add_argument( parser.add_argument(
"--output_dir", "--output_dir",
default=None,
type=str, type=str,
required=True, required=True,
help="Where to store the downloaded artifacts and other result files.", help="Where to store the downloaded artifacts and other result files.",
) )
parser.add_argument( parser.add_argument("--token", default=None, type=str, help="A token that has actions:read permission.")
"--token", default=None, type=str, required=True, help="A token that has actions:read permission."
)
args = parser.parse_args() args = parser.parse_args()
os.makedirs(args.output_dir, exist_ok=True) os.makedirs(args.output_dir, exist_ok=True)
_job_links = get_job_links(args.workflow_run_id) _job_links = get_job_links(args.workflow_run_id, token=args.token)
job_links = {} job_links = {}
# To deal with `workflow_call` event, where a job name is the combination of the job names in the caller and callee. # To deal with `workflow_call` event, where a job name is the combination of the job names in the caller and callee.
# For example, `PyTorch 1.11 / Model tests (models/albert, single-gpu)`. # For example, `PyTorch 1.11 / Model tests (models/albert, single-gpu)`.
@@ -242,7 +246,7 @@ if __name__ == "__main__":
with open(os.path.join(args.output_dir, "job_links.json"), "w", encoding="UTF-8") as fp: with open(os.path.join(args.output_dir, "job_links.json"), "w", encoding="UTF-8") as fp:
json.dump(job_links, fp, ensure_ascii=False, indent=4) json.dump(job_links, fp, ensure_ascii=False, indent=4)
artifacts = get_artifacts_links(args.workflow_run_id) artifacts = get_artifacts_links(args.workflow_run_id, token=args.token)
with open(os.path.join(args.output_dir, "artifacts.json"), "w", encoding="UTF-8") as fp: with open(os.path.join(args.output_dir, "artifacts.json"), "w", encoding="UTF-8") as fp:
json.dump(artifacts, fp, ensure_ascii=False, indent=4) json.dump(artifacts, fp, ensure_ascii=False, indent=4)

View File

@@ -1,5 +1,6 @@
import argparse import argparse
import math import math
import traceback
import dateutil.parser as date_parser import dateutil.parser as date_parser
import requests import requests
@@ -25,11 +26,15 @@ def extract_time_from_single_job(job):
return job_info return job_info
def get_job_time(workflow_run_id): def get_job_time(workflow_run_id, token=None):
"""Extract time info for all jobs in a GitHub Actions workflow run""" """Extract time info for all jobs in a GitHub Actions workflow run"""
headers = None
if token is not None:
headers = {"Accept": "application/vnd.github+json", "Authorization": f"Bearer {token}"}
url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id}/jobs?per_page=100" url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id}/jobs?per_page=100"
result = requests.get(url).json() result = requests.get(url, headers=headers).json()
job_time = {} job_time = {}
try: try:
@@ -37,12 +42,12 @@ def get_job_time(workflow_run_id):
pages_to_iterate_over = math.ceil((result["total_count"] - 100) / 100) pages_to_iterate_over = math.ceil((result["total_count"] - 100) / 100)
for i in range(pages_to_iterate_over): for i in range(pages_to_iterate_over):
result = requests.get(url + f"&page={i + 2}").json() result = requests.get(url + f"&page={i + 2}", headers=headers).json()
job_time.update({job["name"]: extract_time_from_single_job(job) for job in result["jobs"]}) job_time.update({job["name"]: extract_time_from_single_job(job) for job in result["jobs"]})
return job_time return job_time
except Exception as e: except Exception:
print("Unknown error, could not fetch links.", e) print(f"Unknown error, could not fetch links:\n{traceback.format_exc()}")
return {} return {}
@@ -56,9 +61,7 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
# Required parameters # Required parameters
parser.add_argument( parser.add_argument("--workflow_run_id", type=str, required=True, help="A GitHub Actions workflow run id.")
"--workflow_run_id", default=None, type=str, required=True, help="A GitHub Actions workflow run id."
)
args = parser.parse_args() args = parser.parse_args()
job_time = get_job_time(args.workflow_run_id) job_time = get_job_time(args.workflow_run_id)

View File

@@ -16,7 +16,6 @@ import ast
import collections import collections
import functools import functools
import json import json
import math
import operator import operator
import os import os
import re import re
@@ -25,6 +24,7 @@ import time
from typing import Dict, List, Optional, Union from typing import Dict, List, Optional, Union
import requests import requests
from get_ci_error_statistics import get_job_links
from slack_sdk import WebClient from slack_sdk import WebClient
@@ -206,6 +206,15 @@ class Message:
@property @property
def warnings(self) -> Dict: def warnings(self) -> Dict:
# If something goes wrong, let's avoid the CI report failing to be sent.
button_text = "Check warnings (Link not found)"
# Use the workflow run link
job_link = f"https://github.com/huggingface/transformers/actions/runs/{os.environ['GITHUB_RUN_ID']}"
if "Extract warnings in CI artifacts" in github_actions_job_links:
button_text = "Check warnings"
# Use the actual job link
job_link = f"{github_actions_job_links['Extract warnings in CI artifacts']}"
return { return {
"type": "section", "type": "section",
"text": { "text": {
@@ -215,8 +224,8 @@ class Message:
}, },
"accessory": { "accessory": {
"type": "button", "type": "button",
"text": {"type": "plain_text", "text": "Check warnings", "emoji": True}, "text": {"type": "plain_text", "text": button_text, "emoji": True},
"url": f"{github_actions_job_links['Extract warnings in CI artifacts']}", "url": job_link,
}, },
} }
@@ -577,27 +586,6 @@ class Message:
time.sleep(1) time.sleep(1)
def get_job_links():
run_id = os.environ["GITHUB_RUN_ID"]
url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{run_id}/jobs?per_page=100"
result = requests.get(url).json()
jobs = {}
try:
jobs.update({job["name"]: job["html_url"] for job in result["jobs"]})
pages_to_iterate_over = math.ceil((result["total_count"] - 100) / 100)
for i in range(pages_to_iterate_over):
result = requests.get(url + f"&page={i + 2}").json()
jobs.update({job["name"]: job["html_url"] for job in result["jobs"]})
return jobs
except Exception as e:
print("Unknown error, could not fetch links.", e)
return {}
def retrieve_artifact(name: str, gpu: Optional[str]): def retrieve_artifact(name: str, gpu: Optional[str]):
if gpu not in [None, "single", "multi"]: if gpu not in [None, "single", "multi"]:
raise ValueError(f"Invalid GPU for artifact. Passed GPU: `{gpu}`.") raise ValueError(f"Invalid GPU for artifact. Passed GPU: `{gpu}`.")
@@ -770,7 +758,9 @@ if __name__ == "__main__":
Message.error_out(title, ci_title) Message.error_out(title, ci_title)
raise ValueError("Errored out.") raise ValueError("Errored out.")
github_actions_job_links = get_job_links() github_actions_job_links = get_job_links(
workflow_run_id=os.environ["GITHUB_RUN_ID"], token=os.environ["ACCESS_REPO_INFO_TOKEN"]
)
available_artifacts = retrieve_available_artifacts() available_artifacts = retrieve_available_artifacts()
modeling_categories = [ modeling_categories = [

View File

@@ -136,8 +136,10 @@ past_versions_testing = {
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser("Choose the framework and version to install") parser = argparse.ArgumentParser("Choose the framework and version to install")
parser.add_argument("--framework", help="The framework to install. Should be `torch` or `tensorflow`", type=str) parser.add_argument(
parser.add_argument("--version", help="The version of the framework to install.", type=str) "--framework", help="The framework to install. Should be `torch` or `tensorflow`", type=str, required=True
)
parser.add_argument("--version", help="The version of the framework to install.", type=str, required=True)
args = parser.parse_args() args = parser.parse_args()
info = past_versions_testing[args.framework][args.version] info = past_versions_testing[args.framework][args.version]