From 488d2a1c30d91e73e16ac341707b1e7c1b651ff4 Mon Sep 17 00:00:00 2001 From: Andrey Devyatkin Date: Tue, 26 Mar 2024 21:21:40 +0400 Subject: [PATCH] Fix the gap between flaky_test_detection and workflow_prefetcher scripts (#30739) * fix the gap between flaky_test_detection and workflow_prefetcher scripts * fix the gap between flaky_test_detection and workflow_prefetcher scripts --- .../provisioning/alerting/flaky_test.yaml | 2 +- .../github_runs_prefetcher/code/main.py | 26 ++++++++++++------- .test-infra/tools/flaky_test_detection.py | 8 +++++- 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/.test-infra/metrics/grafana/provisioning/alerting/flaky_test.yaml b/.test-infra/metrics/grafana/provisioning/alerting/flaky_test.yaml index 4a04ddb57490..005239cca2ce 100644 --- a/.test-infra/metrics/grafana/provisioning/alerting/flaky_test.yaml +++ b/.test-infra/metrics/grafana/provisioning/alerting/flaky_test.yaml @@ -36,7 +36,7 @@ groups: maxDataPoints: 43200 rawQuery: true rawSql: |- - SELECT COUNT(workflow_id), CAST(workflow_id AS TEXT), name AS workflow_name, filename AS workflow_filename, url AS workflow_url, CAST(threshold AS TEXT) AS workflow_threshold + SELECT COUNT(workflow_id), CAST(workflow_id AS TEXT), name AS workflow_name, filename AS workflow_filename, url AS workflow_url, CAST(threshold AS TEXT) AS workflow_threshold, CAST(retrieved_at AS TEXT) AS workflow_retrieved_at FROM github_workflows WHERE is_flaky = true GROUP BY workflow_id diff --git a/.test-infra/metrics/sync/github/github_runs_prefetcher/code/main.py b/.test-infra/metrics/sync/github/github_runs_prefetcher/code/main.py index ddb2120ab95f..c2fe093b657e 100644 --- a/.test-infra/metrics/sync/github/github_runs_prefetcher/code/main.py +++ b/.test-infra/metrics/sync/github/github_runs_prefetcher/code/main.py @@ -269,6 +269,7 @@ async def fetch(url, semaphore, params=None, headers=None, request_id=None): async def fetch_workflow_runs(): def append_workflow_runs(workflow, runs): + workflow_runs = {} for run in runs: # Getting rid of all runs with a "skipped" status to display # only actual runs @@ -278,15 +279,17 @@ def append_workflow_runs(workflow, runs): status = run["conclusion"] elif run["status"] != "cancelled": status = run["status"] - workflow.runs.append( - WorkflowRun( - run["id"], - status, - run["html_url"], - workflow.id, - datetime.strptime(run["run_started_at"], "%Y-%m-%dT%H:%M:%SZ"), - ) + workflow_run = WorkflowRun( + run["id"], + status, + run["html_url"], + workflow.id, + datetime.strptime(run["run_started_at"], "%Y-%m-%dT%H:%M:%SZ"), ) + if workflow_runs.get(workflow_run.id): + print(f"Duplicate run for {workflow.id} workflow: {workflow_run.id}") + workflow_runs[workflow_run.id] = workflow_run + workflow.runs.extend(workflow_runs.values()) url = f"https://api.github.com/repos/{GIT_ORG}/beam/actions/workflows" headers = {"Authorization": get_token()} @@ -428,7 +431,8 @@ def save_workflows(workflows): url text NOT NULL, dashboard_category text NOT NULL, threshold real NOT NULL, - is_flaky boolean NOT NULL)\n""" + is_flaky boolean NOT NULL, + retrieved_at timestamp with time zone NOT NULL)\n""" create_workflow_runs_table_query = f""" CREATE TABLE IF NOT EXISTS {workflow_runs_table_name} ( run_id text NOT NULL PRIMARY KEY, @@ -441,13 +445,14 @@ def save_workflows(workflows): cursor.execute(create_workflows_table_query) cursor.execute(create_workflow_runs_table_query) insert_workflows_query = f""" - INSERT INTO {workflows_table_name} (workflow_id, name, filename, url, dashboard_category, threshold, is_flaky) + INSERT INTO {workflows_table_name} (workflow_id, name, filename, url, dashboard_category, threshold, is_flaky, retrieved_at) VALUES %s""" insert_workflow_runs_query = f""" INSERT INTO {workflow_runs_table_name} (run_id, run_number, status, url, workflow_id, started_at) VALUES %s""" insert_workflows = [] insert_workflow_runs = [] + current_date = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") for workflow in workflows: insert_workflows.append( ( @@ -458,6 +463,7 @@ def save_workflows(workflows): workflow.category, workflow.threshold, workflow.is_flaky, + current_date, ) ) for idx, run in enumerate(workflow.runs): diff --git a/.test-infra/tools/flaky_test_detection.py b/.test-infra/tools/flaky_test_detection.py index 87a2fb83bb2e..768900db94cb 100644 --- a/.test-infra/tools/flaky_test_detection.py +++ b/.test-infra/tools/flaky_test_detection.py @@ -16,6 +16,7 @@ import os import re import requests +from datetime import datetime from github import Github from github import Auth @@ -34,12 +35,14 @@ def __init__( workflow_name, workflow_filename, workflow_threshold, + workflow_retrieved_at, ): self.workflow_id = workflow_id self.workflow_url = workflow_url self.workflow_name = workflow_name self.workflow_filename = workflow_filename self.workflow_threshold = round(float(workflow_threshold), 2) + self.workflow_retrieved_at = workflow_retrieved_at def get_workflow_issues(issues): @@ -89,6 +92,7 @@ def get_grafana_alerts(): alert["labels"]["workflow_name"], alert["labels"]["workflow_filename"], alert["labels"]["workflow_threshold"], + datetime.fromisoformat(alert["labels"]["workflow_retrieved_at"]), ) ) return alerts @@ -114,6 +118,8 @@ def main(): issue = workflow_closed_issues[alert.workflow_id] if READ_ONLY == "true": print("READ_ONLY is true, not reopening issue") + elif issue.closed_at > alert.workflow_retrieved_at: + print(f"The issue for the workflow {alert.workflow_id} has been closed, skipping") else: issue.edit(state="open") issue.create_comment(body="Reopening since the workflow is still flaky") @@ -121,7 +127,7 @@ def main(): elif alert.workflow_id not in workflow_open_issues.keys(): create_github_issue(repo, alert) else: - print("Issue is already open, skipping") + print(f"The issue for the workflow {alert.workflow_id} is already open, skipping") g.close()