Skip to content

Commit

Permalink
add docs and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
villebro committed Sep 3, 2024
1 parent a5bb398 commit 1a8757b
Show file tree
Hide file tree
Showing 5 changed files with 233 additions and 25 deletions.
55 changes: 53 additions & 2 deletions docs/docs/configuration/sql-templating.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ made available in the Jinja context:

- `columns`: columns which to group by in the query
- `filter`: filters applied in the query
- `from_dttm`: start `datetime` value from the selected time range (`None` if undefined)
- `to_dttm`: end `datetime` value from the selected time range (`None` if undefined)
- `from_dttm`: start `datetime` value from the selected time range (`None` if undefined) (deprecated, use `get_time_filter` instead)
- `to_dttm`: end `datetime` value from the selected time range (`None` if undefined). (deprecated, use `get_time_filter` instead)
- `groupby`: columns which to group by in the query (deprecated)
- `metrics`: aggregate expressions in the query
- `row_limit`: row limit of the query
Expand Down Expand Up @@ -346,6 +346,57 @@ Here's a concrete example:
order by lineage, level
```
**Time Filter for a Specific Column**
The `{{ get_time_filter() }}` macro returns the time filter applied to a specific column. The macro takes the following
parameters:
- `column`: Name of the temporal column. Leave undefined to reference the time range from a Dashboard Native Time Range
filter (when present).
- `target_type`: The target temporal type as recognized by the target database (e.g. `TIMESTAMP`, `DATE` or
`DATETIME`). If `column` is defined, the format will default to the type of the column. This is used to produce
the format of the `from_dttm` and `to_dttm` properties of the returned `TimeFilter` object. Note, that omitting
`column` and `target_type` will render format the temporal values as ISO format.
The return type has the following properties:
- `since`: the start of the time filter (if any)
- `until`: the end of the time filter (if any)
- `time_range`: The time range as defined on the
Here's a concrete example using the `logs` table from the Superset metastore:
```
{% set time_filter = get_time_filter("dttm", remove_filter=True) %}
{% set since = time_filter.since %}
{% set until = time_filter.until %}
{% set time_range = time_filter.time_range %}

SELECT
*,
{% if time_range %}'{{ time_range }}'{% else %}''{% endif %} as time_range
FROM logs
{% if since or until %}WHERE 1 = 1
{% if since %}AND dttm >= {{ since }}{% endif %}
{% if until %}AND dttm < {{ until }}{% endif %}
{% endif %}
```
Assuming we are creating a table chart with a simple `COUNT(*)` as the metric with a time filter `Last week` on the
`dttm` column, this would render the following query on Postgres (note the formatting of the temporal filters, and
the missing filters on the outer query):
```
SELECT COUNT(*) AS count
FROM
(SELECT *,
'Last week' AS time_range
FROM public.logs
WHERE 1 = 1
AND dttm >= TO_TIMESTAMP('2024-08-27 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')
AND dttm < TO_TIMESTAMP('2024-09-03 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')) AS virtual_table
ORDER BY count DESC
LIMIT 1000;
```
**Datasets**
It's possible to query physical and virtual datasets using the `dataset` macro. This is useful if you've defined computed columns and metrics on your datasets, and want to reuse the definition in adhoc SQL Lab queries.
Expand Down
1 change: 0 additions & 1 deletion superset/common/query_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,6 @@ def to_dict(self) -> dict[str, Any]:
"series_limit_metric": self.series_limit_metric,
"to_dttm": self.to_dttm,
"time_shift": self.time_shift,
"time_range": self.time_range,
}
return query_object_dict

Expand Down
57 changes: 37 additions & 20 deletions superset/jinja_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,9 @@ class TimeFilter:
"""
Container for temporal filter.
"""
from_dttm: str | None
to_dttm: str | None

since: str | None
until: str | None
time_range: str | None


Expand Down Expand Up @@ -119,15 +120,13 @@ def __init__( # pylint: disable=too-many-arguments
removed_filters: Optional[list[str]] = None,
database: Optional[Database] = None,
dialect: Optional[Dialect] = None,
time_range: Optional[str] = None,
table: Optional[SqlaTable] = None,
):
self.extra_cache_keys = extra_cache_keys
self.applied_filters = applied_filters if applied_filters is not None else []
self.removed_filters = removed_filters if removed_filters is not None else []
self.database = database
self.dialect = dialect
self.time_range = time_range or NO_TIME_RANGE
self.table = table

def current_user_id(self, add_to_cache_keys: bool = True) -> Optional[int]:
Expand Down Expand Up @@ -382,25 +381,44 @@ def get_time_filter(
target_type: str | None = None,
remove_filter: bool = False,
) -> TimeFilter:
"""Get the applied time filter with appropriate formatting,
either for a specific column, or whichever time filter is being emitted
"""Get the time filter with appropriate formatting,
either for a specific column, or whichever time range is being emitted
from a dashboard.
This is useful if you want to handle time filters inside the virtual dataset,
as by default the time filter is placed on the outer query. This can have
significant performance implications on certain query engines, like Druid.
considerable performance implications, as many databases and query engines
are able to optimize the query better if the temporal filter is placed on the
inner query, as opposded to the outer query.
Usage example::
{% set time_filter = get_time_filter("dttm", remove_filter=True) %}
{% set since = time_filter.since %}
{% set until = time_filter.until %}
{% set time_range = time_filter.time_range %}
select *,
{% if time_range %}'{{ time_range }}'{% else %}''{% endif %} as time_range
from logs
{% if since or until %}where 1 = 1
{% if since %}and dttm >= {{ since }}{% endif %}
{% if until %}and dttm < {{ until }}{% endif %}
{% endif %}
This will render the time filter inside the virtual dataset subquery with the
appropriate formatting, and remove it from the outer query.
:param column: Name of the temporal column. Leave undefined to reference the
time range from a Dashboard Native Time Range filter (when present).
:param target_type: The target temporal type. If `column` is defined, will
default to the type of the column. This is used to produce the format
of the `from_dttm` and `to_dttm` properties of the returned `TimeFilter`
object.
:param target_type: The target temporal type as recognized by the target
database (e.g. `TIMESTAMP`, `DATE` or `DATETIME`). If `column` is defined,
the format will default to the type of the column. This is used to produce
the format of the `since` and `until` properties of the returned
`TimeFilter` object. Note, that omitting `column` and `target_type` will
render format the temporal values as ISO format.
:param remove_filter: When set to true, mark the filter as processed,
removing it from the outer query. Useful when a filter should
only apply to the inner query
only apply to the inner query.
:return: The corresponding time filter.
"""
# pylint: disable=import-outside-toplevel
Expand All @@ -409,6 +427,7 @@ def get_time_filter(
form_data, _ = get_form_data()
convert_legacy_filters_into_adhoc(form_data)
merge_extra_filters(form_data)
time_range = form_data.get("time_range")
if column:
flt: AdhocFilterClause | None = next(

Check warning on line 432 in superset/jinja_context.py

View check run for this annotation

Codecov / codecov/patch

superset/jinja_context.py#L427-L432

Added lines #L427 - L432 were not covered by tests
(
Expand All @@ -429,23 +448,22 @@ def get_time_filter(
time_range = cast(str, flt["comparator"])
if not target_type and self.table:
target_type = self.table.columns_types.get(column)

Check warning on line 450 in superset/jinja_context.py

View check run for this annotation

Codecov / codecov/patch

superset/jinja_context.py#L448-L450

Added lines #L448 - L450 were not covered by tests
else:
time_range = self.time_range
else:
time_range = self.time_range

from_dttm, to_dttm = get_since_until_from_time_range(time_range)
time_range = time_range or NO_TIME_RANGE
since, until = get_since_until_from_time_range(time_range)

Check warning on line 453 in superset/jinja_context.py

View check run for this annotation

Codecov / codecov/patch

superset/jinja_context.py#L452-L453

Added lines #L452 - L453 were not covered by tests

def _format_dttm(dttm: datetime | None) -> str | None:
if target_type is None and dttm:
return dttm.isoformat()
return (

Check warning on line 458 in superset/jinja_context.py

View check run for this annotation

Codecov / codecov/patch

superset/jinja_context.py#L455-L458

Added lines #L455 - L458 were not covered by tests
self.database.db_engine_spec.convert_dttm(target_type or "", dttm)
if self.database and dttm
else None
)

return TimeFilter(

Check warning on line 464 in superset/jinja_context.py

View check run for this annotation

Codecov / codecov/patch

superset/jinja_context.py#L464

Added line #L464 was not covered by tests
from_dttm=_format_dttm(from_dttm),
to_dttm=_format_dttm(to_dttm),
since=_format_dttm(since),
until=_format_dttm(until),
time_range=time_range,
)

Expand Down Expand Up @@ -624,7 +642,6 @@ def set_context(self, **kwargs: Any) -> None:
database=self._database,
dialect=self._database.get_dialect(),
table=self._table,
time_range=self._context.get("time_range"),
)

from_dttm = (
Expand Down
2 changes: 0 additions & 2 deletions superset/models/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1439,7 +1439,6 @@ def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-ma
timeseries_limit: Optional[int] = None,
timeseries_limit_metric: Optional[Metric] = None,
time_shift: Optional[str] = None,
time_range: Optional[str] = None,
) -> SqlaQuery:
"""Querying any sqla table from this common interface"""
if granularity not in self.dttm_cols and granularity is not None:
Expand All @@ -1458,7 +1457,6 @@ def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-ma
"time_column": granularity,
"time_grain": time_grain,
"to_dttm": to_dttm.isoformat() if to_dttm else None,
"time_range": time_range,
"table_columns": [col.column_name for col in self.columns],
"filter": filter,
}
Expand Down
143 changes: 143 additions & 0 deletions tests/unit_tests/jinja_context_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from typing import Any

import pytest
from freezegun import freeze_time
from pytest_mock import MockerFixture
from sqlalchemy.dialects import mysql
from sqlalchemy.dialects.postgresql import dialect
Expand All @@ -32,6 +33,7 @@
ExtraCache,
metric_macro,
safe_proxy,
TimeFilter,
WhereInMacro,
)
from superset.models.core import Database
Expand Down Expand Up @@ -836,3 +838,144 @@ def test_metric_macro_no_dataset_id_with_context_chart_no_datasource_id(
)
mock_get_form_data.assert_called_once()
DatasetDAO.find_by_id.assert_not_called()


@pytest.mark.parametrize(
"args,kwargs,sqlalchemy_uri,queries,time_filter,removed_filters,applied_filters",
[
(
[],
{"target_type": "TIMESTAMP"},
"postgresql://",
[{}],
TimeFilter(
since=None,
until=None,
time_range="No filter",
),
[],
[],
),
(
[],
{"target_type": "TIMESTAMP"},
"postgresql://",
[{"time_range": "Last week"}],
TimeFilter(
since="TO_TIMESTAMP('2024-08-26 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')",
until="TO_TIMESTAMP('2024-09-02 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')",
time_range="Last week",
),
[],
[],
),
(
["dttm"],
{},
"postgresql://",
[
{
"filters": [
{
"col": "dttm",
"op": "TEMPORAL_RANGE",
"val": "Last week",
},
],
}
],
TimeFilter(
since="TO_TIMESTAMP('2024-08-26 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')",
until="TO_TIMESTAMP('2024-09-02 00:00:00.000000', 'YYYY-MM-DD HH24:MI:SS.US')",
time_range="Last week",
),
[],
["dttm"],
),
(
["dt"],
{"remove_filter": True},
"postgresql://",
[
{
"filters": [
{
"col": "dt",
"op": "TEMPORAL_RANGE",
"val": "Last week",
},
],
}
],
TimeFilter(
since="TO_DATE('2024-08-26', 'YYYY-MM-DD')",
until="TO_DATE('2024-09-02', 'YYYY-MM-DD')",
time_range="Last week",
),
["dt"],
["dt"],
),
(
["dttm"],
{"target_type": "DATE", "remove_filter": True},
"trino://",
[
{
"filters": [
{
"col": "dttm",
"op": "TEMPORAL_RANGE",
"val": "Last month",
},
],
}
],
TimeFilter(
since="DATE '2024-08-02'",
until="DATE '2024-09-02'",
time_range="Last month",
),
["dttm"],
["dttm"],
),
],
)
def test_get_time_filter(
args: list[Any],
kwargs: dict[str, Any],
sqlalchemy_uri: str,
queries: list[Any] | None,
time_filter: TimeFilter,
removed_filters: list[str],
applied_filters: list[str],
) -> None:
"""
Test the ``get_time_filter`` macro.
"""
columns = [
TableColumn(column_name="dt", is_dttm=1, type="DATE"),
TableColumn(column_name="dttm", is_dttm=1, type="TIMESTAMP"),
]

database = Database(database_name="my_database", sqlalchemy_uri=sqlalchemy_uri)
table = SqlaTable(
table_name="my_datast",
columns=columns,
main_dttm_col="dt",
database=database,
)

with (
freeze_time("2024-09-03"),
app.test_request_context(
json={"queries": queries},
),
):
cache = ExtraCache(
database=database,
table=table,
)

assert cache.get_time_filter(*args, **kwargs) == time_filter
assert cache.removed_filters == removed_filters
assert cache.applied_filters == applied_filters

0 comments on commit 1a8757b

Please sign in to comment.