Skip to content

Commit

Permalink
General query result returns tuples from iter_tuples method.
Browse files Browse the repository at this point in the history
Special NamedTuple class documents the items in the returned tuples.
  • Loading branch information
andy-slac committed Aug 8, 2024
1 parent 12f5406 commit 3dc968e
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 27 deletions.
66 changes: 44 additions & 22 deletions python/lsst/daf/butler/queries/_general_query_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@

from __future__ import annotations

__all__ = ("GeneralQueryResults",)
__all__ = ("GeneralQueryResults", "GeneralResultTuple")

from collections.abc import Iterator
from typing import Any, final
from typing import Any, NamedTuple, final

from .._dataset_ref import DatasetRef
from .._dataset_type import DatasetType
Expand All @@ -41,6 +41,25 @@
from .tree import QueryTree


class GeneralResultTuple(NamedTuple):
"""Helper class for general result that represents the result row as a
data coordinate and optionally a set of dataset refs extracted from a row.
"""

data_id: DataCoordinate
"""Data coordinate for current row."""

refs: list[DatasetRef]
"""Dataset refs extracted from the current row, the order matches the order
of arguments in ``iter_tuples`` call."""

raw_row: dict[str, Any]
"""Original result row, the keys are the names of the dimensions,
dimension fields (separated from dimension by dot) or dataset type fields
(separated from dataset type name by dot).
"""


@final
class GeneralQueryResults(QueryResultsBase):
"""A query for `DatasetRef` results with a single dataset type.
Expand Down Expand Up @@ -74,7 +93,7 @@ def __iter__(self) -> Iterator[dict[str, Any]]:
Yields
------
row_dict : `dict` [`str`, `Any`]
Result row as dictionary, the keys the names of the dimensions,
Result row as dictionary, the keys are the names of the dimensions,
dimension fields (separated from dimension by dot) or dataset type
fields (separated from dataset type name by dot).
"""
Expand All @@ -83,33 +102,36 @@ def __iter__(self) -> Iterator[dict[str, Any]]:
for row in page.rows:
yield dict(zip(columns, row))

def iter_refs(self, dataset_type: DatasetType) -> Iterator[tuple[DatasetRef, dict[str, Any]]]:
"""Iterate over result rows and return DatasetRef constructed from each
row and an original row.
def iter_tuples(self, *dataset_types: DatasetType) -> Iterator[GeneralResultTuple]:
"""Iterate over result rows and return data coordinate, and dataset
refs constructed from each row, and an original row.
Parameters
----------
dataset_type : `DatasetType`
Type of the dataset to return.
*dataset_types : `DatasetType`
Zero or more types of the datasets to return.
Yields
------
dataset_ref : `DatasetRef`
Dataset reference.
row_dict : `dict` [`str`, `Any`]
Result row as dictionary, the keys the names of the dimensions,
dimension fields (separated from dimension by dot) or dataset type
fields (separated from dataset type name by dot).
row_tuple : `GeneralResultTuple`
Structure containing data coordinate, refs, and a copy of the row.
"""
dimensions = dataset_type.dimensions
id_key = f"{dataset_type.name}.dataset_id"
run_key = f"{dataset_type.name}.run"
data_id_keys = dimensions.required
all_dimensions = self._spec.dimensions
dataset_keys = []
for dataset_type in dataset_types:
dimensions = dataset_type.dimensions
id_key = f"{dataset_type.name}.dataset_id"
run_key = f"{dataset_type.name}.run"
dataset_keys.append((dimensions, id_key, run_key))
for row in self:
values = tuple(row[key] for key in data_id_keys)
data_id = DataCoordinate.from_required_values(dimensions, values)
ref = DatasetRef(dataset_type, data_id, row[run_key], id=row[id_key])
yield ref, row
values = tuple(row[key] for key in all_dimensions.required)
data_coordinate = DataCoordinate.from_required_values(all_dimensions, values)
refs = []
for dimensions, id_key, run_key in dataset_keys:
values = tuple(row[key] for key in dimensions.required)
data_id = DataCoordinate.from_required_values(dimensions, values)
refs.append(DatasetRef(dataset_type, data_id, row[run_key], id=row[id_key]))
yield GeneralResultTuple(data_id=data_coordinate, refs=refs, raw_row=row)

@property
def dimensions(self) -> DimensionGroup:
Expand Down
5 changes: 2 additions & 3 deletions python/lsst/daf/butler/registry/sql_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2504,9 +2504,8 @@ def queryDatasetAssociations(
)
timespan_key = f"{datasetType.name}.timespan"
collection_key = f"{datasetType.name}.collection"
for ref, row_dict in result.iter_refs(datasetType):
_LOG.debug("row_dict: %s", row_dict)
yield DatasetAssociation(ref, row_dict[collection_key], row_dict[timespan_key])
for _, refs, row_dict in result.iter_tuples(datasetType):
yield DatasetAssociation(refs[0], row_dict[collection_key], row_dict[timespan_key])

def get_datastore_records(self, ref: DatasetRef) -> DatasetRef:
"""Retrieve datastore records for given ref.
Expand Down
4 changes: 2 additions & 2 deletions python/lsst/daf/butler/remote_butler/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,8 +527,8 @@ def queryDatasetAssociations(
)
timespan_key = f"{datasetType.name}.timespan"
collection_key = f"{datasetType.name}.collection"
for ref, row_dict in result.iter_refs(datasetType):
yield DatasetAssociation(ref, row_dict[collection_key], row_dict[timespan_key])
for _, refs, row_dict in result.iter_tuples(datasetType):
yield DatasetAssociation(refs[0], row_dict[collection_key], row_dict[timespan_key])

@property
def storageClasses(self) -> StorageClassFactory:
Expand Down

0 comments on commit 3dc968e

Please sign in to comment.