-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Collect output from all attempts of workflows; full JSON output
This is a reworking of `get_action_errors.py` with the following goals: - Retrieve all attempts of a workflow, not just the last one. This is important for collection of transient errors since people will usually re-run their workflows when there is a failure unrelated to their code. - Tolerate interruptions of the script due to timeouts, rate-limiting, and networking issues. On re-run, workflow runs that have been fully fetched are skipped. - Record the full JSON for workflow attempts, checks, and annotations so that we can do more in-depth queries if needed. The script no longer produces a CSV, but it's still straightforward to query how often we're seeing a particular error.
- Loading branch information
Showing
3 changed files
with
248 additions
and
118 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,138 +1,262 @@ | ||
""" | ||
Script to get the annotations from all failed checks in edx-platform after a given date | ||
Script to get the annotations from all checks in a repo in a given date range. | ||
Gets all the commits to master after the date, then for each commit gets each check suite, then for each failed check | ||
suite gets each run. Collects the annotations for all the failed runs. The annotations will sometimes contain useful | ||
error messages, sometimes just the exit code. Getting the full logs requires admin permissions to edx-platform so it's | ||
not included in this script. | ||
Example output row: | ||
commit_date,run_started_at,run_completed_at,commit_hash,name,message | ||
2023-07-26T20:59:01Z,2023-07-27T06:56:23Z,2023-07-27T07:01:58Z,06e738e64a3485ecec037a9b8a36cf4ae145ea8a, | ||
upgrade-one-python-dependency-workflow,Process completed with exit code 2. | ||
This script takes a pretty long time to run (15m for 2 months) and there is a risk if you look too far back you will hit | ||
your API limit. | ||
Run with --help (or see docstring of `run`) for more details. | ||
""" | ||
|
||
from csv import DictWriter | ||
import json | ||
import os | ||
import time | ||
from datetime import datetime | ||
from os import path | ||
|
||
import click | ||
import requests | ||
|
||
|
||
@click.command() | ||
@click.option('--token', envvar='GITHUB_TOKEN') | ||
@click.option('--start_date', type=click.DateTime(formats=["%Y-%m-%d"]), help="Date of earliest commit") | ||
@click.option('--filename', help="Where to write the data") | ||
def get_errors_from_date(token, start_date, filename): | ||
def _ensure_dir(base, *more): | ||
""" | ||
Creates a csv documenting the annotations from all failed runs for commits to edx-platform after the given date | ||
Join these path segments, create as dir if not already, and return the path. | ||
""" | ||
subdir = path.join(base, *more) | ||
os.makedirs(subdir, exist_ok=True) | ||
return subdir | ||
|
||
Parameters: | ||
token (string): The GitHub API token. Retrieved from the env GITHUB_TOKEN variable | ||
start_date (date): The earliest date to look for | ||
filename (string): Where to write the csv | ||
|
||
""" | ||
headers = {'Authorization': f"Bearer {token}"} | ||
all_commits_after_date = get_commits_after_date(start_date, headers=headers) | ||
all_check_suites = [] | ||
all_rows = [] | ||
for commit in all_commits_after_date: | ||
# gather all the check suite data from each commit into a single list | ||
add_commit_check_suites(commit, all_check_suites, headers) | ||
for check_suite in all_check_suites: | ||
# only record annotations for failed runs | ||
if check_suite['conclusion'] == 'failure': | ||
check_runs = requests.get(check_suite['check_runs_url'], headers=headers).json() | ||
for run in check_runs['check_runs']: | ||
if run['conclusion'] == 'failure' and run['output']['annotations_count'] > 0: | ||
annotations = requests.get(run['output']['annotations_url'], headers=headers).json() | ||
for annotation in annotations: | ||
all_rows.append({ | ||
'commit_hash': run['head_sha'], | ||
'name': run['name'], | ||
'message': annotation['message'], | ||
'run_started_at': run['started_at'], | ||
'run_completed_at': run['completed_at'], | ||
'commit_date': check_suite['commit_date'] | ||
}) | ||
|
||
with open(filename, 'w') as f: | ||
writes = DictWriter(f, fieldnames=['commit_date', 'run_started_at', 'run_completed_at', 'commit_hash', 'name', | ||
'message']) | ||
writes.writeheader() | ||
writes.writerows(all_rows) | ||
|
||
|
||
def get_commits_after_date(cut_off_date, headers): | ||
""" | ||
Get API data for all commits to edx-platform/master after the given date | ||
class ActionsDownloader: | ||
|
||
Parameters: | ||
cut_off_date (date): Earliest date to look | ||
headers (dict): Authentication headers for GH requests | ||
def __init__(self, *, output_dir, token): | ||
self.output_dir = output_dir | ||
self.api_headers = { | ||
'Accept': 'application/vnd.github+json', | ||
'Authorization': f"Bearer {token}", | ||
'X-GitHub-Api-Version': '2022-11-28', | ||
} | ||
|
||
Returns: | ||
A list of all the API responses for each commit after the date | ||
""" | ||
base_url = "https://api.github.com/repos/openedx/edx-platform/commits?sha=master&per_page=100" | ||
# will keep track of whether we've hit our start_date. the API automatically returns commits ordered | ||
# by date, descending | ||
found_last = False | ||
all_commits_after_date = [] | ||
page = 1 | ||
while not found_last: | ||
page_url = f"{base_url}&page={page}" | ||
print(f"Fetching page {page_url}") | ||
response = requests.get(page_url, headers=headers) | ||
if response.status_code >= 400: | ||
print(response) | ||
break | ||
response_json = response.json() | ||
if len(response_json) == 0: | ||
break | ||
for single_commit in response_json: | ||
# if present, take off the "Z" at the end of the date to make it proper ISO format | ||
commit_date = datetime.fromisoformat(single_commit['commit']['committer']['date'].replace("Z", "")) | ||
if commit_date < cut_off_date: | ||
found_last = True | ||
break | ||
all_commits_after_date.append(single_commit) | ||
page += 1 | ||
return all_commits_after_date | ||
|
||
|
||
def add_commit_check_suites(current_commit, current_suites, headers): | ||
# Unix epoch timestamp in seconds when GitHub will allow us to | ||
# resume making requests (or None if not rate-limited) | ||
self.github_sleep_until_s = None | ||
|
||
# The actual contents doesn't really matter, but empty files might | ||
# be confusing and maybe the fetch date will be useful. | ||
self.download_marker_data = { | ||
'fetch_run_timestamp': datetime.utcnow().isoformat(), | ||
} | ||
self.workflow_fetch_params = { | ||
# We don't want the response to include all the PRs that include this commit. | ||
# It can be large for active repos and it doesn't help us. | ||
'exclude_pull_requests': 'true', | ||
} | ||
|
||
def _github_get(self, url, *, params=None): | ||
""" | ||
GET the url with GitHub auth token and return a `requests` response. | ||
Performs both proactive backoff based on response header hints and | ||
reactive backoff based on error responses responses. | ||
Docs: | ||
- https://docs.github.com/en/rest/using-the-rest-api/best-practices-for-using-the-rest-api?apiVersion=2022-11-28 | ||
- https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api?apiVersion=2022-11-28 | ||
""" | ||
backoff_s = None # exponential backoff in seconds, or None if not in effect | ||
while True: | ||
# If GitHub has told us how long to wait, use that instead of the | ||
# current exponential backoff value. | ||
if self.github_sleep_until_s is not None: | ||
# Add slop to prevent tight loop on expiry | ||
sleep_s = self.github_sleep_until_s + 5.0 - time.time() | ||
else: | ||
sleep_s = backoff_s | ||
|
||
if sleep_s and sleep_s > 0: | ||
time.sleep(sleep_s) | ||
|
||
response = requests.get(url, params=params, headers=self.api_headers, timeout=20.0) | ||
|
||
# Update rate-limiting data for next call | ||
if out_of_requests := (response.headers.get('x-ratelimit-remaining') == '0'): | ||
self.github_sleep_until_s = int(response.headers.get('x-ratelimit-reset')) | ||
print( | ||
"Reached rate limit. " | ||
f"Will wait {int(self.github_sleep_until_s - time.time())} seconds " | ||
"before next request." | ||
) | ||
else: | ||
self.github_sleep_until_s = None | ||
backoff_s = None | ||
|
||
if response.status_code == 200: | ||
# We're good to go! | ||
return response | ||
elif out_of_requests: | ||
# We got an error and have been informed we're out of requests. | ||
# (Should be a 429 or 403 according to GitHub's docs.) We'll try | ||
# again. | ||
continue | ||
elif response.status_code == 429: | ||
# It's possible that GitHub might give us a 429 without the | ||
# expected rate-limiting headers. | ||
print("Rate-limited without timing hint; performing exponential backoff.") | ||
backoff_s = 2 * backoff_s if backoff_s else 4 | ||
continue | ||
else: | ||
# Generic error case | ||
response.raise_for_status() | ||
|
||
def _write_json(self, data, *path_parts): | ||
""" | ||
Write data as pretty-printed JSON to the given path (joining as needed). | ||
""" | ||
with open(path.join(*path_parts), 'w') as f: | ||
json.dump(data, f, sort_keys=True, indent=2) | ||
|
||
def _log_attempt(self, attempt, workflow_dir): | ||
""" | ||
Log this attempt and all of its checks and annotations. | ||
""" | ||
attempt_dir = _ensure_dir(workflow_dir, f"attempt_{attempt['run_attempt']}") | ||
|
||
attempt_file = path.join(attempt_dir, f"attempt.json") | ||
if path.isfile(attempt_file): | ||
print("Attempt already fully downloaded; skipping.") | ||
return | ||
|
||
# Get the checks associated with this workflow run -- this | ||
# includes output title, summary, and text. | ||
for check_run in self._github_get(attempt['check_suite_url'] + '/check-runs').json()['check_runs']: | ||
self._write_json(check_run, attempt_dir, f"check_run_{check_run['id']}.json") | ||
|
||
annotations = self._github_get(check_run['output']['annotations_url']).json() | ||
self._write_json(annotations, attempt_dir, f"annotations_{check_run['id']}.json") | ||
|
||
# Do this last, indicating that the attempt was completely | ||
# downloaded. This allows us to skip it next time. | ||
self._write_json(attempt, attempt_file) | ||
|
||
def _list_all_attempts(self, run): | ||
""" | ||
Yield all attempts of the given workflow run, including that one. | ||
""" | ||
yield run | ||
while next_url := run.get('previous_attempt_url'): | ||
resp = self._github_get(next_url, params=self.workflow_fetch_params) | ||
run = resp.json() | ||
yield run | ||
|
||
def _download_workflow_run(self, run, repo_dir): | ||
print(f"Downloading workflow run id={run['id']}") | ||
|
||
workflow_dir = _ensure_dir(repo_dir, f"run_{run['id']}",) | ||
|
||
download_marker = path.join(workflow_dir, f"download-marker.json") | ||
if path.isfile(download_marker): | ||
print("Workflow already fully downloaded; skipping.") | ||
return | ||
|
||
# We're getting the *most recent attempt* of a run. Spool | ||
# out the whole list of attempts and write them out. | ||
for attempt in self._list_all_attempts(run): | ||
self._log_attempt(attempt, workflow_dir) | ||
|
||
# Once all attempts have been logged, write out a marker file | ||
# that indicates this workflow has been completely downloaded | ||
# and can be skipped in the future. | ||
self._write_json(self.download_marker_data, download_marker) | ||
|
||
def _list_completed_runs(self, owner, repo, start_date, end_date): | ||
""" | ||
Yield all completed workflow runs. | ||
""" | ||
# https://docs.github.com/en/rest/actions/workflow-runs?apiVersion=2022-11-28#list-workflow-runs-for-a-repository | ||
runs_url = f"https://api.github.com/repos/{owner}/{repo}/actions/runs" | ||
while runs_url is not None: | ||
params = { | ||
**self.workflow_fetch_params, | ||
# Filter on status=completed to get all workflows that have finished | ||
# running. The API docs allow you to use a status *or* a conclusion | ||
# here, but doesn't explain their relationship. The check run API docs | ||
# seem to cover basically the same values, though: | ||
# https://docs.github.com/en/rest/guides/using-the-rest-api-to-interact-with-checks?apiVersion=2022-11-28#about-check-runs | ||
'status': 'completed', | ||
# https://docs.github.com/en/search-github/getting-started-with-searching-on-github/understanding-the-search-syntax#query-for-dates | ||
'created': f'{start_date}..{end_date}', | ||
'per_page': 100, | ||
} | ||
print(f"Requesting {runs_url} with {params=!r}") | ||
resp = self._github_get(runs_url, params=params) | ||
yield from resp.json()['workflow_runs'] | ||
runs_url = resp.links.get('next', {}).get('url') | ||
|
||
def download(self, owner, repo, start_date, end_date): | ||
repo_dir = _ensure_dir(self.output_dir, owner, repo) | ||
|
||
for workflow_run in self._list_completed_runs(owner, repo, start_date, end_date): | ||
self._download_workflow_run(workflow_run, repo_dir) | ||
|
||
|
||
@click.command() | ||
@click.option( | ||
'--token', envvar='GITHUB_TOKEN', | ||
required=True, | ||
help="A GitHub access token that has access to the repository.", | ||
) | ||
@click.option( | ||
'--output-dir', type=click.Path(file_okay=False, dir_okay=True, writable=True), | ||
required=True, | ||
help="A directory (or path where one can be created) where the output will be written.", | ||
) | ||
@click.option( | ||
'--owner', type=str, required=True, | ||
help="Owning user or organization of the repo, e.g. openedx.", | ||
) | ||
@click.option( | ||
'--repo', type=str, required=True, | ||
help="Repo shortname, e.g. edx-platform.", | ||
) | ||
@click.option( | ||
'--start-date', type=click.DateTime(formats=["%Y-%m-%d"]), required=True, | ||
help="Only fetch workflow runs starting from this date.", | ||
) | ||
@click.option( | ||
'--end-date', type=click.DateTime(formats=["%Y-%m-%d"]), required=True, | ||
help="Only fetch workflow runs up through this date.", | ||
) | ||
def run(*, token, output_dir, owner, repo, start_date, end_date): | ||
""" | ||
Add API information from all check suites performed for a given commit to the given list | ||
Fetch information about workflows and check outcomes for a repository | ||
within some date range, writing the output to a directory. | ||
This script will fetch: | ||
\b | ||
- Workflow runs, including prior attempts | ||
- Check runs associated with the attempts | ||
- Annotations produced by the checks | ||
The output directory will contain subdirectories of the form | ||
`{OWNER}/{REPO}/run_#/attempt_#/` for each attempt of a workflow run. | ||
(run_# is numbered by workflow run ID, and attempt_# by attempt index.) | ||
The run_# directory will also contain a `download-marker.json` which | ||
indicates that all information about the workflow run was successfully | ||
downloaded. (If missing, this indicates a partial download.) | ||
Each attempt directory will contain: | ||
Parameters: | ||
current_commit (str): the SHA of the commit to check | ||
current_suites (list): list to be extended | ||
headers (dict): Authentication headers for connecting to GitHub | ||
\b | ||
- attempt.json: Information about the workflow run. The documents for each | ||
attempt of a workflow run will be largely the same. | ||
- check_run_#.json: One of the checks associated with the attempt, numbered | ||
by check-run ID. | ||
- annotations_#.json: Annotations associated with the check run of that ID. | ||
""" | ||
sha = current_commit['sha'] | ||
check_url = f"https://api.github.com/repos/openedx/edx-platform/commits/{sha}/check-suites?per_page=100" | ||
page = 1 | ||
while True: | ||
# Keep going until we get an empty check_suites list or an error. An empty list means we've hit the last page. | ||
paginated_url = f"{check_url}&page={page}" | ||
print(f"Fetching page {paginated_url}") | ||
response = requests.get(paginated_url, headers=headers).json() | ||
if 'check_suites' not in response.keys(): | ||
print(response) | ||
break | ||
check_suites = response['check_suites'] | ||
if len(check_suites) == 0: | ||
break | ||
# silly line to pass the date of the commit along to eventually write in the spreadsheet | ||
current_suites.extend([{**s, 'commit_date': current_commit['commit']['committer']['date']} | ||
for s in check_suites]) | ||
page += 1 | ||
dl = ActionsDownloader(output_dir=output_dir, token=token) | ||
dl.download( | ||
owner, repo, | ||
# We just want the Y-m-d part | ||
start_date.date().isoformat(), end_date.date().isoformat(), | ||
) | ||
|
||
|
||
if __name__ == '__main__': | ||
get_errors_from_date() | ||
run() |