From 7241cb0428e4e0fc121f340866d787b94d1f7327 Mon Sep 17 00:00:00 2001 From: Ben Wu <12437227+BenWu@users.noreply.github.com> Date: Tue, 18 Jun 2024 18:15:00 -0400 Subject: [PATCH] Generate event monitoring view for all apps with events ping (#5799) --- .../glean_usage/event_monitoring_live.py | 40 +++++++------ .../event_monitoring_live.metadata.yaml | 2 +- .../templates/event_monitoring_live.view.sql | 56 ++++++++++--------- .../event_monitoring_live_v1.metadata.yaml | 2 +- 4 files changed, 55 insertions(+), 45 deletions(-) diff --git a/sql_generators/glean_usage/event_monitoring_live.py b/sql_generators/glean_usage/event_monitoring_live.py index 628e228d760..233e921f5e7 100644 --- a/sql_generators/glean_usage/event_monitoring_live.py +++ b/sql_generators/glean_usage/event_monitoring_live.py @@ -1,10 +1,10 @@ """Generate Materialized Views and aggregate queries for event monitoring.""" import os -from collections import namedtuple +from collections import namedtuple, OrderedDict from datetime import datetime from pathlib import Path -from typing import Set +from typing import List, Set import requests @@ -39,7 +39,17 @@ def __init__(self) -> None: self.custom_render_kwargs = {} self.base_table_name = "events_v1" - def _get_tables_with_events(self, v1_name: str) -> Set[str]: + def _get_prod_datasets_with_event(self) -> List[str]: + """Get glean datasets with an events table in generated schemas.""" + return [ + s.bq_dataset_family + for s in get_stable_table_schemas() + if s.schema_id == "moz://mozilla.org/schemas/glean/ping/1" + and s.bq_table == "events_v1" + ] + + def _get_tables_with_events(self, v1_name: str, bq_dataset_name: str) -> Set[str]: + """Get tables for the given app that receive event type metrics.""" pings = set() resp = requests.get(METRICS_INFO_URL.format(app_name=v1_name)) resp.raise_for_status() @@ -50,6 +60,9 @@ def _get_tables_with_events(self, v1_name: str) -> Set[str]: latest_history = metric.get("history", [])[-1] pings.update(latest_history.get("send_in_pings", [])) + if bq_dataset_name in self._get_prod_datasets_with_event(): + pings.add("events") + return pings def generate_per_app_id( @@ -89,7 +102,7 @@ def generate_per_app_id( for app_dataset in app if dataset == app_dataset["bq_dataset_family"] ][0] - events_tables = self._get_tables_with_events(v1_name) + events_tables = self._get_tables_with_events(v1_name, dataset) events_tables = [ f"{ping.replace('-', '_')}_v1" for ping in events_tables @@ -115,7 +128,7 @@ def generate_per_app_id( for app_dataset in app if dataset == app_dataset["bq_dataset_family"] ][0], - events_tables=events_tables, + events_tables=sorted(events_tables), ) render_kwargs.update(self.custom_render_kwargs) @@ -162,13 +175,6 @@ def generate_across_apps( if not self.across_apps_enabled: return - prod_datasets_with_event = [ - s.bq_dataset_family - for s in get_stable_table_schemas() - if s.schema_id == "moz://mozilla.org/schemas/glean/ping/1" - and s.bq_table == "events_v1" - ] - aggregate_table = "event_monitoring_aggregates_v1" target_view_name = "_".join(self.target_table_id.split("_")[:-1]) @@ -176,7 +182,7 @@ def generate_across_apps( "generate", "glean_usage", "events_monitoring", "events_tables", fallback={} ) - event_tables_per_dataset = {} + event_tables_per_dataset = OrderedDict() for app in apps: for app_dataset in app: @@ -201,7 +207,9 @@ def generate_across_apps( ][0] event_tables = [ f"{ping.replace('-', '_')}_v1" - for ping in self._get_tables_with_events(v1_name) + for ping in self._get_tables_with_events( + v1_name, app_dataset["bq_dataset_family"] + ) if ping not in ConfigLoader.get( "generate", "glean_usage", "events_monitoring", "skip_pings" @@ -209,7 +217,7 @@ def generate_across_apps( ] if len(event_tables) > 0: - event_tables_per_dataset[dataset] = event_tables + event_tables_per_dataset[dataset] = sorted(event_tables) render_kwargs = dict( header="-- Generated via bigquery_etl.glean_usage\n", @@ -219,7 +227,7 @@ def generate_across_apps( table=target_view_name, target_table=f"{TARGET_DATASET_CROSS_APP}_derived.{aggregate_table}", apps=apps, - prod_datasets=prod_datasets_with_event, + prod_datasets=self._get_prod_datasets_with_event(), event_tables_per_dataset=event_tables_per_dataset, ) render_kwargs.update(self.custom_render_kwargs) diff --git a/sql_generators/glean_usage/templates/event_monitoring_live.metadata.yaml b/sql_generators/glean_usage/templates/event_monitoring_live.metadata.yaml index 32e0b9ee2cf..5ce949be73a 100644 --- a/sql_generators/glean_usage/templates/event_monitoring_live.metadata.yaml +++ b/sql_generators/glean_usage/templates/event_monitoring_live.metadata.yaml @@ -1,6 +1,6 @@ friendly_name: Event Monitoring Live description: |- - View that combines live and aggregated event monitoring data for {{ dataset_id }} + View that combines live and aggregated event monitoring data for all glean apps. owners: - ascholtz@mozilla.com - akomar@mozilla.com diff --git a/sql_generators/glean_usage/templates/event_monitoring_live.view.sql b/sql_generators/glean_usage/templates/event_monitoring_live.view.sql index 846ab2891d0..23b3d095446 100644 --- a/sql_generators/glean_usage/templates/event_monitoring_live.view.sql +++ b/sql_generators/glean_usage/templates/event_monitoring_live.view.sql @@ -1,8 +1,31 @@ -CREATE OR REPLACE VIEW `{{ project_id }}.{{ target_view }}` AS -{% for app in apps %} -{% set outer_loop = loop -%} -{% for dataset in app -%} -{% if dataset['bq_dataset_family'] in prod_datasets %} +CREATE OR REPLACE VIEW + `{{ project_id }}.{{ target_view }}` AS + {% for app in apps %} + {% set outer_loop = loop -%} + {% for dataset in app -%} + {% if dataset['bq_dataset_family'] in prod_datasets + and dataset['bq_dataset_family'] in event_tables_per_dataset %} + SELECT + window_start, + window_end, + event_category, + event_name, + event_extra_key, + country, + normalized_app_name, + channel, + version, + experiment, + experiment_branch, + total_events + FROM + `{{ project_id }}.{{ dataset['bq_dataset_family'] }}_derived.event_monitoring_live_v1` + WHERE + submission_date > DATE_SUB(CURRENT_DATE(), INTERVAL 2 DAY) + UNION ALL + {% endif %} + {% endfor %} + {% endfor %} SELECT window_start, window_end, @@ -16,28 +39,7 @@ SELECT experiment, experiment_branch, total_events -FROM - `{{ project_id }}.{{ dataset['bq_dataset_family'] }}_derived.event_monitoring_live_v1` -WHERE - submission_date > DATE_SUB(CURRENT_DATE(), INTERVAL 2 DAY) -UNION ALL -{% endif %} -{% endfor %} -{% endfor %} -SELECT - window_start, - window_end, - event_category, - event_name, - event_extra_key, - country, - normalized_app_name, - channel, - version, - experiment, - experiment_branch, - total_events -FROM +FROM `{{ project_id }}.{{ target_table }}` WHERE submission_date <= DATE_SUB(CURRENT_DATE(), INTERVAL 2 DAY) diff --git a/sql_generators/glean_usage/templates/event_monitoring_live_v1.metadata.yaml b/sql_generators/glean_usage/templates/event_monitoring_live_v1.metadata.yaml index 697c29f1ed1..f5ac4f0c003 100644 --- a/sql_generators/glean_usage/templates/event_monitoring_live_v1.metadata.yaml +++ b/sql_generators/glean_usage/templates/event_monitoring_live_v1.metadata.yaml @@ -1,7 +1,7 @@ friendly_name: Event Monitoring Live description: |- Materialized view of experimentation related events - coming from {{ dataset_id }}. + coming from {{ dataset }}. owners: - ascholtz@mozilla.com - akomar@mozilla.com