From 1640f3747fb0d8ee1cb4442faafc427720f500f1 Mon Sep 17 00:00:00 2001 From: Devin Smith Date: Fri, 23 Feb 2024 10:04:11 -0800 Subject: [PATCH] Remove old benchmark script (#5187) --- .github/tools/README.md | 41 --- .github/tools/deephaven-gh-runner-init.sh | 43 --- .github/tools/metrics/benchmark_capture.py | 308 ------------------ .github/tools/metrics/requirements.txt | 5 - .../templates/benchmark_dashboard.json.j2 | 43 --- 5 files changed, 440 deletions(-) delete mode 100644 .github/tools/README.md delete mode 100644 .github/tools/deephaven-gh-runner-init.sh delete mode 100644 .github/tools/metrics/benchmark_capture.py delete mode 100644 .github/tools/metrics/requirements.txt delete mode 100644 .github/tools/metrics/templates/benchmark_dashboard.json.j2 diff --git a/.github/tools/README.md b/.github/tools/README.md deleted file mode 100644 index 0852673dd08..00000000000 --- a/.github/tools/README.md +++ /dev/null @@ -1,41 +0,0 @@ -# GitHub Workflow Tools - -## Nightly Benchmark Metrics and Dashboarding - -Location: `.github/tools/metrics/benchmark_capture.py` - -The benchmark capture tool has been created to run after the nightly benchmark job in an effort to capture result data and publish it to GCP Monitoring for historical reference. This script consumes the CSV files that the benchmark gradle jobs publish to the `tmp/logs/` folder at the root of the project during the `Nightly Benchmark` workflow execution. - -### Benchmark File Format -Due to constraints from the GCP API we are limited to 10 labels per metric. This is a limitation imposed by GCP and to accomodate this we have a check to only process the first 9 fields, the `Benchmark` field is skipped as it is used as the metric's base name. - -#### Baseline Fields -- Benchmark: Name of the benchmark, used as the base of the metric name. -- Score: Value used as the metric point. -- Run & Iteration are not used in the dashboarding and the mean average of each metric combination is redered as a single data point. - -#### Dynamic Fields -Any fields beyond the "Benchmark", "Score", "Run" and "Iteration" are added as labels to allow for the filtering and aggregation of the various permutations for all benchmark results. - -### Dashboard Creation - -For each CSV file processed by the tooling a dashboard will be generated to render the metrics. Each dashboard is limited to rendering a total of 40 metrics, this is a customizable value from the commandline but has been chosen as a default in line with the upper limit from GCP for metrics per dashboard. - -The dashboard title is generated from the filename basename and postfixed by the dashboard index, in the even there are greater than 40 metrics total then you will see multiple dashboards created to support this. - -### GitHub Actions Integration - -Location: `.github/workflows/nightly-benchmarks.yml` - -The `nightly-benchmarks.yml` file includes a `benchmarks` section which can be extended to expand the scope of benchmarks run and published, for each benchmark step defined the CSV files will be consumed in the later publish-metrics step as well as be archived to GitHub artifacts by the archive step. - -The following template can be used to create additional benchmark steps within the `benchmark` job definition. - -``` -- name: Benchmark - [BENCHMARK_NAME] -uses: burrunan/gradle-cache-action@v1 -with: - job-id: benchmark - arguments: [GRADLE_COMMAND] - gradle-version: wrapper -``` \ No newline at end of file diff --git a/.github/tools/deephaven-gh-runner-init.sh b/.github/tools/deephaven-gh-runner-init.sh deleted file mode 100644 index 8fe38c88001..00000000000 --- a/.github/tools/deephaven-gh-runner-init.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -set -e - -######################################## -## GitHub Runner Installation -## Deephaven -######################################## - -GH_RUNNER_VERSION=${GH_RUNNER_VERSION:="2.277.1"} -GH_RUNNER_REPO_URL=${GH_RUNNER_REPO_URL:="https://github.com/deephaven/deephaven-core"} -GH_RUNNER_TOKEN=${GH_RUNNER_TOKEN:?""} -GH_RUNNER_NAME=${GH_RUNNER_NAME:=$HOSTNAME} -GH_RUNNER_LABELS=${GH_RUNNER_LABELS:="gce-runner,benchmark"} -GH_RUNNER_ROOT=${GH_RUNNER_ROOT:="/github"} -GH_RUNNER_TMP=${GH_RUNNER_TMP:="/github-tmp"} - -## Prepare Apt system packages -apt update -apt install --yes curl unzip docker.io - -## JDK Build Prereq -apt install --yes openjdk-8-jdk-headless - -## Setup runner directories -mkdir -p $GH_RUNNER_ROOT -mkdir -p $GH_RUNNER_TMP - -## Pull runner binary -curl -o actions.tar.gz --location "https://github.com/actions/runner/releases/download/v${GH_RUNNER_VERSION}/actions-runner-linux-x64-${GH_RUNNER_VERSION}.tar.gz" -tar xzf actions.tar.gz --directory $GH_RUNNER_ROOT -rm -f actions.tar.gz - -## Install dependencies -/github/bin/installdependencies.sh - -## Configure runner -RUNNER_ALLOW_RUNASROOT=1 $GH_RUNNER_ROOT/config.sh --unattended --replace --work $GH_RUNNER_TMP --url "$GH_RUNNER_REPO_URL" --token "$GH_RUNNER_TOKEN" --labels $GH_RUNNER_LABELS - -## Configure runner as service -cd $GH_RUNNER_ROOT || exit -./svc.sh install -./svc.sh start \ No newline at end of file diff --git a/.github/tools/metrics/benchmark_capture.py b/.github/tools/metrics/benchmark_capture.py deleted file mode 100644 index 940fa25f5d2..00000000000 --- a/.github/tools/metrics/benchmark_capture.py +++ /dev/null @@ -1,308 +0,0 @@ -#!/usr/bin/env python3 - -import time -import click -import os -import csv -import requests -import json -import logging - -from pprint import pprint -from datetime import timedelta - -from jinja2 import Environment, FileSystemLoader - -from google.api_core.exceptions import NotFound, InternalServerError - -from google.cloud import monitoring_v3 -from google.cloud import monitoring_dashboard_v1 - -# Lable limit of 10 is a hard API limit -GCP_LABEL_LIMIT = 10 - -# Dashboard limits, 10 is an organic floor, 40 is a hard API limit -DASHBOARD_METRIC_FLOOR = 10 -DASHBOARD_METRIC_CEILING = 40 - -# Setup logger -logger = logging.getLogger("benchmark_capture") -logger.setLevel(logging.INFO) - -ch = logging.StreamHandler() -ch.setLevel(logging.INFO) - -logger.addHandler(ch) - -# Capture time for metrics -now = time.time() - -# Setup GCP clients -monitoring_client = monitoring_v3.MetricServiceClient() -dashboard_client = monitoring_dashboard_v1.DashboardsServiceClient() - - -def record_metric(project_name, metric): - """Create TS entry for captured metric""" - - global logger - global now - global monitoring_client - - series = monitoring_v3.TimeSeries() - - series.resource.type = "global" - series.metric.type = f"custom.googleapis.com/{metric['Benchmark']}" - - # Required to maintain uniqueness of each permutation - series.metric.labels["Run"] = metric["Run"] - series.metric.labels["Iteration"] = metric["Iteration"] - - # Populate dynamic labels, GCP limit is 10 labels per descriptor - for key in list(metric.keys()): - if key not in ["Benchmark", "Run", "Iteration", "Score"]: - series.metric.labels[key] = metric[key] - - if len(series.metric.labels) > GCP_LABEL_LIMIT: - logger.warn( - f"Exiting metric label loop, limit of {GCP_LABEL_LIMIT} labels." - ) - break # Break out, we have hit limit on labels - - seconds = int(now) - nanos = int((now - seconds) * 10 ** 9) - - interval = monitoring_v3.TimeInterval( - {"end_time": {"seconds": seconds, "nanos": nanos}} - ) - - point = monitoring_v3.Point( - {"interval": interval, "value": {"double_value": float(metric["Score"])}} - ) - - series.points = [point] - - logger.info( - f"Publishing {series.resource.type}/{series.metric.type}: {metric['Score']}" - ) - - try: - monitoring_client.create_time_series(name=project_name, time_series=[series]) - except InternalServerError: - logger.error( - f"Failed to publish metric {series.metric.type}, this may be because the metric descriptor has been recently created. Will retry on the next run." - ) - - -def get_dashboard(project_name, title, index=1): - """Attempt to retrieve a dashboard and return the JSON""" - - global logger - - dashboard_request = monitoring_dashboard_v1.types.GetDashboardRequest( - name=f"{project_name}/dashboards/{title}-{index}" - ) - - try: - dashboard = dashboard_client.get_dashboard(request=dashboard_request) - - logger.info(f"Found dashboard {project_name}/dashboards/{title}-{index}.") - - return dashboard - except NotFound: - logger.info( - f"Dashboard {project_name}/dashboards/{title}-{index} does not exist." - ) - - return None - - -def generate_dashboard( - template_path, project_name, title, metrics, filter_keys, index=1 -): - """Generate JSON template and return Python object representation of template for later processing.""" - - global logger - - logger.info( - f"Generating dashboard template {project_name}/dashboards/{title}-{index} with {len(metrics)} metrics." - ) - - file_loader = FileSystemLoader(template_path) - env = Environment(loader=file_loader) - - template = env.get_template("benchmark_dashboard.json.j2") - - dashboard_template = json.loads( - template.render( - dashboard_path=f"{project_name}/dashboards/{title}-{index}", - title=f"{title} ({index})", - metrics=metrics, - filter_keys=filter_keys, - group_by_keys=["Iteration"], - ) - ) - - return dashboard_template - - -def publish_dashboards(project_name, title, dashboard_templates): - """Populate JSON dashboard template and use it to create/update a GCP Dashboard in project.""" - - global logger - - for idx, dashboard_template in enumerate(dashboard_templates): - # Create Dashboard PB - dashboard = monitoring_dashboard_v1.Dashboard(dashboard_template) - - # Fetch dashboard to see if we need to create or update in place - existing_dashboard = get_dashboard(project_name, title, idx + 1) - - if existing_dashboard is None: # Create new dashboard - dashboard_request = monitoring_dashboard_v1.types.CreateDashboardRequest( - parent=project_name, dashboard=dashboard - ) - - logger.info( - f"Publishing new dashboard {project_name}/dashboards/{title}-{idx + 1}." - ) - - dashboard_client.create_dashboard(dashboard_request) - - else: # Update existing dashboard - # Ensure we target returned version of the dashboard - dashboard.etag = existing_dashboard.etag # <-- Broke everything :( - - dashboard_request = monitoring_dashboard_v1.types.UpdateDashboardRequest( - dashboard=dashboard - ) - - logger.info( - f"Updating dashboard {project_name}/dashboards/{title}-{idx + 1}." - ) - - dashboard_client.update_dashboard(dashboard_request) - - -def get_metadata(path): - """Get GCP metadata object for requested path""" - - global logger - - logger.debug(f"Querying {path} from instance metadata service.") - - url = f"http://metadata.google.internal/{path}" - headers = {"Metadata-Flavor": "Google"} - - r = requests.get(url, headers=headers) - - if r.status_code != 404: - return r.text - else: - return None - - -def get_project_id(): - """Retrieve GCP project from the instance metadata""" - - global logger - - logger.info("Attempting to query project ID from instance metadata service.") - - return get_metadata("/computeMetadata/v1/project/numeric-project-id") - - -@click.command( - help="Read in provided glob of FILES and generate custom metrics for historical benchmark data capture." -) -@click.option( - "--project-id", envvar="GCP_PROJECT_ID", default=None, help="Numeric GCP project ID" -) -@click.option( - "--metrics-per-dashboard", - default=40, - help="Maximum number of metrics per dashboard", -) -@click.option("--template-path", default="templates", help="Root of template path") -@click.argument("files", nargs=-1) -def main(project_id, metrics_per_dashboard, template_path, files): - """Read in CSV and push custom metrics to project""" - - global logger - - logger.info("Starting metrics capture and dashboard creation.") - - if project_id is None: - project_id = get_project_id() - - project_name = f"projects/{project_id}" - - logger.info(f'Targeting GCP project "{project_name}"') - - for f in files: - metrics = [] - metric_keys = [] - - with open(f, "r") as data: - logger.info(f"Reading {f}...") - for metric in csv.DictReader(data): - # We only need a single iteration of each run to aggregate keys for widgets - if metric["Iteration"] == "1": - # Append keys to listing of keys to be used later in dashboard creation - metric_keys += metric.keys() - # Again don't need every iteration to create the widgets - metrics.append(metric) - - # Commit the metric timeseries to GCP services - record_metric(project_name, metric) - - # Extract Dashboard name from filename - dashboard_title = os.path.basename(f).split("-")[0] - - # Squash key list - metric_keys = set(metric_keys) - # Remove keys that will NOT be used for creating metric filter in the dashboard genneration - filter_keys = list( - metric_keys - set(["Benchmark", "Run", "Iteration", "Score"]) - )[ - :9 - ] # Limit to first 10 keys - - if metrics_per_dashboard > DASHBOARD_METRIC_CEILING: - logger.warning( - "Metrics per dashboard can not exceed 40 per GCP API limitations. Reset value to 40." - ) - metrics_per_dashboard = DASHBOARD_METRIC_CEILING - - elif metrics_per_dashboard < DASHBOARD_METRIC_FLOOR: - logger.warning("Metrics per dashboard below 10. Reset value to 10.") - metrics_per_dashboard = DASHBOARD_METRIC_FLOOR - - # Generate dashboard templates - dashboard_templates = [] - windows_size = len(metrics) + (metrics_per_dashboard - 1) - windows = windows_size // metrics_per_dashboard - for i in range(windows): - metrics_slice = metrics[ - (i * metrics_per_dashboard) : (i + 1) * metrics_per_dashboard - ] - - dashboard_templates.append( - generate_dashboard( - template_path, - project_name, - dashboard_title, - metrics_slice, - filter_keys, - index=i + 1, - ) - ) - - # Publish dashboards to GCP - publish_dashboards(project_name, dashboard_title, dashboard_templates) - - logger.info("Completed metrics capture and dashboard creation.") - - -if __name__ == "__main__": - main() diff --git a/.github/tools/metrics/requirements.txt b/.github/tools/metrics/requirements.txt deleted file mode 100644 index 662712863a9..00000000000 --- a/.github/tools/metrics/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -click==7.1.2 -google-cloud-monitoring==2.2.1 -google-cloud-monitoring-dashboards==2.0.0 -requests==2.25.1 -jinja2==2.11.3 \ No newline at end of file diff --git a/.github/tools/metrics/templates/benchmark_dashboard.json.j2 b/.github/tools/metrics/templates/benchmark_dashboard.json.j2 deleted file mode 100644 index 5ec9fd8e4ce..00000000000 --- a/.github/tools/metrics/templates/benchmark_dashboard.json.j2 +++ /dev/null @@ -1,43 +0,0 @@ -{ - "name": "{{ dashboard_path }}", - "display_name": "{{ title }}", - "grid_layout": { - "columns": 4, - "widgets": [ - {% for metric in metrics -%} - { - "title": "{{ metric.Benchmark.split('.')[-1] }}|{% for k in filter_keys %}{{ k }}:{{ metric[k] }}{% if not loop.last %},{% endif -%}{% endfor %}|", - "xy_chart": { - "data_sets": [ - { - "time_series_query": { - "time_series_filter": { - "filter": "metric.type=\"custom.googleapis.com/{{ metric.Benchmark }}\" resource.type=\"global\" {% for k in filter_keys %}metric.label.\"{{ k }}\"=\"{{ metric[k] }}\"{% if not loop.last %} {% endif -%}{% endfor %}", - "aggregation": { - "alignment_period": {"seconds": 86400, "nanos": 0}, - "per_series_aligner": "ALIGN_MEAN", - "cross_series_reducer": "REDUCE_MEAN" - }, - "secondary_aggregation": { - "alignment_period": {"seconds": 86400, "nanos": 0} - } - } - }, - "plot_type": "LINE", - "min_alignment_period": {"seconds": 86400, "nanos": 0} - } - ], - "timeshift_duration": {"seconds": 604800, "nanos": 0}, - "y_axis": { - "label": "y1Axis", - "scale": "LINEAR" - }, - "chart_options": { - "mode": "COLOR" - } - } - }{% if not loop.last %},{% endif -%} - {% endfor -%} - ] - } -} \ No newline at end of file