Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable querying cumulative metrics with agg_time_dimension #999

Merged
merged 13 commits into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ class CumulativeMetricRequiresMetricTimeIssue(MetricFlowQueryResolutionIssue):
def ui_description(self, associated_input: MetricFlowQueryResolverInput) -> str:
return (
f"The query includes a cumulative metric {repr(self.metric_reference.element_name)} that does not "
f"accumulate over all-time, but the group-by items do not include {repr(METRIC_TIME_ELEMENT_NAME)}"
f"accumulate over all-time, but the group-by items do not include {repr(METRIC_TIME_ELEMENT_NAME)} "
"or the metric's agg_time_dimension."
)

@override
Expand Down
53 changes: 26 additions & 27 deletions metricflow/query/validation_rules/metric_time_requirements.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
from __future__ import annotations

from typing import List, Sequence
from typing import Sequence

from dbt_semantic_interfaces.enum_extension import assert_values_exhausted
from dbt_semantic_interfaces.naming.keywords import METRIC_TIME_ELEMENT_NAME
from dbt_semantic_interfaces.protocols import WhereFilterIntersection
from dbt_semantic_interfaces.references import MetricReference
from dbt_semantic_interfaces.type_enums import MetricType, TimeGranularity
from dbt_semantic_interfaces.type_enums.date_part import DatePart
from dbt_semantic_interfaces.references import MetricReference, TimeDimensionReference
from dbt_semantic_interfaces.type_enums import MetricType
from typing_extensions import override

from metricflow.model.semantic_manifest_lookup import SemanticManifestLookup
Expand All @@ -34,29 +33,11 @@ class MetricTimeQueryValidationRule(PostResolutionQueryValidationRule):
def __init__(self, manifest_lookup: SemanticManifestLookup) -> None: # noqa: D
super().__init__(manifest_lookup=manifest_lookup)

metric_time_specs: List[TimeDimensionSpec] = []

for time_granularity in TimeGranularity:
metric_time_specs.append(
TimeDimensionSpec(
element_name=METRIC_TIME_ELEMENT_NAME,
entity_links=(),
time_granularity=time_granularity,
date_part=None,
)
self._metric_time_specs = tuple(
TimeDimensionSpec.generate_possible_specs_for_time_dimension(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice.

time_dimension_reference=TimeDimensionReference(element_name=METRIC_TIME_ELEMENT_NAME), entity_links=()
)
for date_part in DatePart:
for time_granularity in date_part.compatible_granularities:
metric_time_specs.append(
TimeDimensionSpec(
element_name=METRIC_TIME_ELEMENT_NAME,
entity_links=(),
time_granularity=time_granularity,
date_part=date_part,
)
)

self._metric_time_specs = tuple(metric_time_specs)
)

def _group_by_items_include_metric_time(self, query_resolver_input: ResolverInputForQuery) -> bool:
for group_by_item_input in query_resolver_input.group_by_item_inputs:
Expand All @@ -65,6 +46,18 @@ def _group_by_items_include_metric_time(self, query_resolver_input: ResolverInpu

return False

def _group_by_items_include_agg_time_dimension(
self, query_resolver_input: ResolverInputForQuery, metric_reference: MetricReference
) -> bool:
valid_agg_time_dimension_specs = self._manifest_lookup.metric_lookup.get_valid_agg_time_dimensions_for_metric(
metric_reference
)
for group_by_item_input in query_resolver_input.group_by_item_inputs:
if group_by_item_input.spec_pattern.matches_any(valid_agg_time_dimension_specs):
return True

return False

@override
def validate_metric_in_resolution_dag(
self,
Expand All @@ -74,14 +67,20 @@ def validate_metric_in_resolution_dag(
) -> MetricFlowQueryResolutionIssueSet:
metric = self._get_metric(metric_reference)
query_includes_metric_time = self._group_by_items_include_metric_time(resolver_input_for_query)
query_includes_metric_time_or_agg_time_dimension = (
query_includes_metric_time
or self._group_by_items_include_agg_time_dimension(
query_resolver_input=resolver_input_for_query, metric_reference=metric_reference
)
)

if metric.type is MetricType.SIMPLE or metric.type is MetricType.CONVERSION:
return MetricFlowQueryResolutionIssueSet.empty_instance()
elif metric.type is MetricType.CUMULATIVE:
if (
metric.type_params is not None
and (metric.type_params.window is not None or metric.type_params.grain_to_date is not None)
and not query_includes_metric_time
and not query_includes_metric_time_or_agg_time_dimension
):
return MetricFlowQueryResolutionIssueSet.from_issue(
CumulativeMetricRequiresMetricTimeIssue.from_parameters(
Expand Down
17 changes: 2 additions & 15 deletions metricflow/test/query/test_query_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,28 +460,15 @@ def test_cumulative_metric_wrong_time_dimension_validation() -> None:


def test_cumulative_metric_agg_time_dimension_name_validation() -> None:
"""Test that queries for cumulative metrics fail if the agg_time_dimension is selected by name.

Currently, cumulative metrics only return correct results if the query includes the `metric_time` virtual
dimension. In many cases the underlying agg_time_dimension is a single column and users will use it
directly instead of requesting metric_time. While shis should be fine, we cannot allow it until we fix
the query rendering issues that prevent this from working correctly.

This is a test of validation enforcement to ensure users don't get incorrect results due to current
limitations, and should be deleted or updated when this restriction is lifted.
"""
"""Test that queries for cumulative metrics succeed if the agg_time_dimension is selected by name."""
bookings_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=BOOKINGS_YAML)
metrics_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=METRICS_YAML)
revenue_yaml_file = YamlConfigFile(filepath="inline_for_test_1", contents=REVENUE_YAML)
query_parser = query_parser_from_yaml(
[EXAMPLE_PROJECT_CONFIGURATION_YAML_CONFIG_FILE, bookings_yaml_file, revenue_yaml_file, metrics_yaml_file]
)

with pytest.raises(InvalidQueryException, match="do not include 'metric_time'"):
query_parser.parse_and_validate_query(
metric_names=["revenue_cumulative"],
group_by_names=["company__ds"],
)
query_parser.parse_and_validate_query(metric_names=["revenue_cumulative"], group_by_names=["company__ds"])


def test_derived_metric_query_parsing() -> None:
Expand Down