Skip to content

Commit

Permalink
Handle collection query optimization parameters
Browse files Browse the repository at this point in the history
Update RemoteButlerCollections to pass the new optimization parameters added in DM-45993 to the server.
  • Loading branch information
dhirving committed Sep 10, 2024
1 parent 56d6a27 commit 8c3ca9c
Show file tree
Hide file tree
Showing 10 changed files with 49 additions and 17 deletions.
6 changes: 3 additions & 3 deletions python/lsst/daf/butler/_butler_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ def query_info(
include_parents: bool = False,
include_summary: bool = False,
include_doc: bool = False,
summary_datasets: Iterable[DatasetType] | None = None,
summary_datasets: Iterable[DatasetType] | Iterable[str] | None = None,
) -> Sequence[CollectionInfo]:
"""Query the butler for collections matching an expression and
return detailed information about those collections.
Expand All @@ -310,8 +310,8 @@ def query_info(
include_doc : `bool`, optional
Whether the returned information includes collection documentation
string.
summary_datasets : `~collections.abc.Iterable` [ `DatasetType` ], \
optional
summary_datasets : `~collections.abc.Iterable` [ `DatasetType` ] or \
`~collections.abc.Iterable` [ `str` ], optional
Dataset types to include in returned summaries. Only used if
``include_summary`` is `True`. If not specified then all dataset
types will be included.
Expand Down
18 changes: 18 additions & 0 deletions python/lsst/daf/butler/_dataset_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -796,3 +796,21 @@ def _unpickle_via_factory(factory: Callable, args: Any, kwargs: Any) -> DatasetT
arguments as well as positional arguments.
"""
return factory(*args, **kwargs)


def get_dataset_type_name(datasetTypeOrName: DatasetType | str) -> str:
"""Given a `DatasetType` object or a dataset type name, return a dataset
type name.
Parameters
----------
datasetTypeOrName : `DatasetType` | `str`
A DatasetType, or the name of a DatasetType. This union is a common
parameter in many `Butler` methods.
"""
if isinstance(datasetTypeOrName, DatasetType):
return datasetTypeOrName.name
elif isinstance(datasetTypeOrName, str):
return datasetTypeOrName
else:
raise TypeError(f"Expected DatasetType or str, got unexpected object: {datasetTypeOrName}")

Check warning on line 816 in python/lsst/daf/butler/_dataset_type.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/_dataset_type.py#L816

Added line #L816 was not covered by tests
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def query_info(
include_parents: bool = False,
include_summary: bool = False,
include_doc: bool = False,
summary_datasets: Iterable[DatasetType] | None = None,
summary_datasets: Iterable[DatasetType] | Iterable[str] | None = None,
) -> Sequence[CollectionInfo]:
info = []
with self._registry.caching_context():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import sqlalchemy

from ...._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef, DatasetType
from ...._dataset_type import get_dataset_type_name
from ...._exceptions_legacy import DatasetTypeError
from ....dimensions import DimensionUniverse
from ..._collection_summary import CollectionSummary
Expand Down Expand Up @@ -511,12 +512,14 @@ def getCollectionSummary(self, collection: CollectionRecord) -> CollectionSummar
return summaries[collection.key]

def fetch_summaries(
self, collections: Iterable[CollectionRecord], dataset_types: Iterable[DatasetType] | None = None
self,
collections: Iterable[CollectionRecord],
dataset_types: Iterable[DatasetType] | Iterable[str] | None = None,
) -> Mapping[Any, CollectionSummary]:
# Docstring inherited from DatasetRecordStorageManager.
dataset_type_names: Iterable[str] | None = None
if dataset_types is not None:
dataset_type_names = set(dataset_type.name for dataset_type in dataset_types)
dataset_type_names = set(get_dataset_type_name(dt) for dt in dataset_types)
return self._summaries.fetch_summaries(collections, dataset_type_names, self._dataset_type_from_row)

_versions: list[VersionTuple]
Expand Down
4 changes: 3 additions & 1 deletion python/lsst/daf/butler/registry/interfaces/_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,9 @@ def getCollectionSummary(self, collection: CollectionRecord) -> CollectionSummar

@abstractmethod
def fetch_summaries(
self, collections: Iterable[CollectionRecord], dataset_types: Iterable[DatasetType] | None = None
self,
collections: Iterable[CollectionRecord],
dataset_types: Iterable[DatasetType] | Iterable[str] | None = None,
) -> Mapping[Any, CollectionSummary]:
"""Fetch collection summaries given their names and dataset types.
Expand Down
9 changes: 2 additions & 7 deletions python/lsst/daf/butler/remote_butler/_ref_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from pydantic import TypeAdapter

from .._dataset_ref import DatasetRef
from .._dataset_type import DatasetType
from .._dataset_type import DatasetType, get_dataset_type_name
from .._storage_class import StorageClass
from ..dimensions import DataCoordinate, DataId, DataIdValue, SerializedDataId
from .server_models import DatasetTypeName
Expand Down Expand Up @@ -85,12 +85,7 @@ def normalize_dataset_type_name(datasetTypeOrName: DatasetType | str) -> Dataset
A DatasetType, or the name of a DatasetType. This union is a common
parameter in many `Butler` methods.
"""
if isinstance(datasetTypeOrName, DatasetType):
return DatasetTypeName(datasetTypeOrName.name)
elif isinstance(datasetTypeOrName, str):
return DatasetTypeName(datasetTypeOrName)
else:
raise TypeError(f"Got unexpected object for DatasetType: {datasetTypeOrName}")
return DatasetTypeName(get_dataset_type_name(datasetTypeOrName))


def simplify_dataId(dataId: DataId | None, kwargs: dict[str, DataIdValue]) -> SerializedDataId:
Expand Down
14 changes: 12 additions & 2 deletions python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from ._collection_args import convert_collection_arg_to_glob_string_list
from ._defaults import DefaultsHolder
from ._http_connection import RemoteButlerHttpConnection, parse_model
from ._ref_utils import normalize_dataset_type_name
from .server_models import QueryCollectionInfoRequestModel, QueryCollectionInfoResponseModel


Expand Down Expand Up @@ -87,7 +88,7 @@ def query_info(
include_parents: bool = False,
include_summary: bool = False,
include_doc: bool = False,
summary_datasets: Iterable[DatasetType] | None = None,
summary_datasets: Iterable[DatasetType] | Iterable[str] | None = None,
) -> Sequence[CollectionInfo]:
if collection_types is None:
types = list(CollectionType.all())
Expand All @@ -97,13 +98,20 @@ def query_info(
if include_chains is None:
include_chains = not flatten_chains

if summary_datasets is None:
dataset_types = None
else:
dataset_types = [normalize_dataset_type_name(t) for t in summary_datasets]

request = QueryCollectionInfoRequestModel(
expression=convert_collection_arg_to_glob_string_list(expression),
collection_types=types,
flatten_chains=flatten_chains,
include_chains=include_chains,
include_parents=include_parents,
include_summary=include_summary,
include_doc=include_doc,
summary_datasets=dataset_types,
)
response = self._connection.post("query_collection_info", request)
model = parse_model(response, QueryCollectionInfoResponseModel)
Expand All @@ -115,7 +123,9 @@ def get_info(
) -> CollectionInfo:
if has_globs(name):
raise ValueError("Search expressions are not allowed in 'name' parameter to get_info")

Check warning on line 125 in python/lsst/daf/butler/remote_butler/_remote_butler_collections.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler_collections.py#L125

Added line #L125 was not covered by tests
results = self.query_info(name, include_parents=include_parents, include_summary=include_summary)
results = self.query_info(
name, include_parents=include_parents, include_summary=include_summary, include_doc=True
)
assert len(results) == 1, "Only one result should be returned for get_info."
return results[0]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,8 @@ def query_collection_info(
include_chains=request.include_chains,
include_parents=request.include_parents,
include_summary=request.include_summary,
include_doc=request.include_doc,
summary_datasets=request.summary_datasets,
)
return QueryCollectionInfoResponseModel(collections=list(collections))

Expand Down
2 changes: 2 additions & 0 deletions python/lsst/daf/butler/remote_butler/server_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,8 @@ class QueryCollectionInfoRequestModel(pydantic.BaseModel):
include_chains: bool
include_parents: bool
include_summary: bool
include_doc: bool
summary_datasets: list[DatasetTypeName] | None


class QueryCollectionInfoResponseModel(pydantic.BaseModel):
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/tests/hybrid_butler_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def query_info(
include_parents: bool = False,
include_summary: bool = False,
include_doc: bool = False,
summary_datasets: Iterable[DatasetType] | None = None,
summary_datasets: Iterable[DatasetType] | Iterable[str] | None = None,
) -> Sequence[CollectionInfo]:
return self._hybrid._remote_butler.collections.query_info(
expression,
Expand Down

0 comments on commit 8c3ca9c

Please sign in to comment.