From 8c3ca9c8ec1c5c240a6b7adb2fc6fc5c303a2367 Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Tue, 10 Sep 2024 10:47:15 -0700 Subject: [PATCH] Handle collection query optimization parameters Update RemoteButlerCollections to pass the new optimization parameters added in DM-45993 to the server. --- python/lsst/daf/butler/_butler_collections.py | 6 +++--- python/lsst/daf/butler/_dataset_type.py | 18 ++++++++++++++++++ .../_direct_butler_collections.py | 2 +- .../registry/datasets/byDimensions/_manager.py | 7 +++++-- .../butler/registry/interfaces/_datasets.py | 4 +++- .../daf/butler/remote_butler/_ref_utils.py | 9 ++------- .../_remote_butler_collections.py | 14 ++++++++++++-- .../remote_butler/server/handlers/_external.py | 2 ++ .../daf/butler/remote_butler/server_models.py | 2 ++ .../butler/tests/hybrid_butler_collections.py | 2 +- 10 files changed, 49 insertions(+), 17 deletions(-) diff --git a/python/lsst/daf/butler/_butler_collections.py b/python/lsst/daf/butler/_butler_collections.py index 6d4bae4984..504dcc12c3 100644 --- a/python/lsst/daf/butler/_butler_collections.py +++ b/python/lsst/daf/butler/_butler_collections.py @@ -283,7 +283,7 @@ def query_info( include_parents: bool = False, include_summary: bool = False, include_doc: bool = False, - summary_datasets: Iterable[DatasetType] | None = None, + summary_datasets: Iterable[DatasetType] | Iterable[str] | None = None, ) -> Sequence[CollectionInfo]: """Query the butler for collections matching an expression and return detailed information about those collections. @@ -310,8 +310,8 @@ def query_info( include_doc : `bool`, optional Whether the returned information includes collection documentation string. - summary_datasets : `~collections.abc.Iterable` [ `DatasetType` ], \ - optional + summary_datasets : `~collections.abc.Iterable` [ `DatasetType` ] or \ + `~collections.abc.Iterable` [ `str` ], optional Dataset types to include in returned summaries. Only used if ``include_summary`` is `True`. If not specified then all dataset types will be included. diff --git a/python/lsst/daf/butler/_dataset_type.py b/python/lsst/daf/butler/_dataset_type.py index 9321ea67c6..47fdacd49d 100644 --- a/python/lsst/daf/butler/_dataset_type.py +++ b/python/lsst/daf/butler/_dataset_type.py @@ -796,3 +796,21 @@ def _unpickle_via_factory(factory: Callable, args: Any, kwargs: Any) -> DatasetT arguments as well as positional arguments. """ return factory(*args, **kwargs) + + +def get_dataset_type_name(datasetTypeOrName: DatasetType | str) -> str: + """Given a `DatasetType` object or a dataset type name, return a dataset + type name. + + Parameters + ---------- + datasetTypeOrName : `DatasetType` | `str` + A DatasetType, or the name of a DatasetType. This union is a common + parameter in many `Butler` methods. + """ + if isinstance(datasetTypeOrName, DatasetType): + return datasetTypeOrName.name + elif isinstance(datasetTypeOrName, str): + return datasetTypeOrName + else: + raise TypeError(f"Expected DatasetType or str, got unexpected object: {datasetTypeOrName}") diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py index 1ce6fb63b9..a84cebecde 100644 --- a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py +++ b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py @@ -114,7 +114,7 @@ def query_info( include_parents: bool = False, include_summary: bool = False, include_doc: bool = False, - summary_datasets: Iterable[DatasetType] | None = None, + summary_datasets: Iterable[DatasetType] | Iterable[str] | None = None, ) -> Sequence[CollectionInfo]: info = [] with self._registry.caching_context(): diff --git a/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py b/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py index 909761ea19..df0f17a03f 100644 --- a/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py +++ b/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py @@ -12,6 +12,7 @@ import sqlalchemy from ...._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef, DatasetType +from ...._dataset_type import get_dataset_type_name from ...._exceptions_legacy import DatasetTypeError from ....dimensions import DimensionUniverse from ..._collection_summary import CollectionSummary @@ -511,12 +512,14 @@ def getCollectionSummary(self, collection: CollectionRecord) -> CollectionSummar return summaries[collection.key] def fetch_summaries( - self, collections: Iterable[CollectionRecord], dataset_types: Iterable[DatasetType] | None = None + self, + collections: Iterable[CollectionRecord], + dataset_types: Iterable[DatasetType] | Iterable[str] | None = None, ) -> Mapping[Any, CollectionSummary]: # Docstring inherited from DatasetRecordStorageManager. dataset_type_names: Iterable[str] | None = None if dataset_types is not None: - dataset_type_names = set(dataset_type.name for dataset_type in dataset_types) + dataset_type_names = set(get_dataset_type_name(dt) for dt in dataset_types) return self._summaries.fetch_summaries(collections, dataset_type_names, self._dataset_type_from_row) _versions: list[VersionTuple] diff --git a/python/lsst/daf/butler/registry/interfaces/_datasets.py b/python/lsst/daf/butler/registry/interfaces/_datasets.py index d86fe25414..be8a22f910 100644 --- a/python/lsst/daf/butler/registry/interfaces/_datasets.py +++ b/python/lsst/daf/butler/registry/interfaces/_datasets.py @@ -674,7 +674,9 @@ def getCollectionSummary(self, collection: CollectionRecord) -> CollectionSummar @abstractmethod def fetch_summaries( - self, collections: Iterable[CollectionRecord], dataset_types: Iterable[DatasetType] | None = None + self, + collections: Iterable[CollectionRecord], + dataset_types: Iterable[DatasetType] | Iterable[str] | None = None, ) -> Mapping[Any, CollectionSummary]: """Fetch collection summaries given their names and dataset types. diff --git a/python/lsst/daf/butler/remote_butler/_ref_utils.py b/python/lsst/daf/butler/remote_butler/_ref_utils.py index b5944fa9bb..039805f7b5 100644 --- a/python/lsst/daf/butler/remote_butler/_ref_utils.py +++ b/python/lsst/daf/butler/remote_butler/_ref_utils.py @@ -36,7 +36,7 @@ from pydantic import TypeAdapter from .._dataset_ref import DatasetRef -from .._dataset_type import DatasetType +from .._dataset_type import DatasetType, get_dataset_type_name from .._storage_class import StorageClass from ..dimensions import DataCoordinate, DataId, DataIdValue, SerializedDataId from .server_models import DatasetTypeName @@ -85,12 +85,7 @@ def normalize_dataset_type_name(datasetTypeOrName: DatasetType | str) -> Dataset A DatasetType, or the name of a DatasetType. This union is a common parameter in many `Butler` methods. """ - if isinstance(datasetTypeOrName, DatasetType): - return DatasetTypeName(datasetTypeOrName.name) - elif isinstance(datasetTypeOrName, str): - return DatasetTypeName(datasetTypeOrName) - else: - raise TypeError(f"Got unexpected object for DatasetType: {datasetTypeOrName}") + return DatasetTypeName(get_dataset_type_name(datasetTypeOrName)) def simplify_dataId(dataId: DataId | None, kwargs: dict[str, DataIdValue]) -> SerializedDataId: diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py index 43572b63c0..db3f3ca83e 100644 --- a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py +++ b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py @@ -40,6 +40,7 @@ from ._collection_args import convert_collection_arg_to_glob_string_list from ._defaults import DefaultsHolder from ._http_connection import RemoteButlerHttpConnection, parse_model +from ._ref_utils import normalize_dataset_type_name from .server_models import QueryCollectionInfoRequestModel, QueryCollectionInfoResponseModel @@ -87,7 +88,7 @@ def query_info( include_parents: bool = False, include_summary: bool = False, include_doc: bool = False, - summary_datasets: Iterable[DatasetType] | None = None, + summary_datasets: Iterable[DatasetType] | Iterable[str] | None = None, ) -> Sequence[CollectionInfo]: if collection_types is None: types = list(CollectionType.all()) @@ -97,6 +98,11 @@ def query_info( if include_chains is None: include_chains = not flatten_chains + if summary_datasets is None: + dataset_types = None + else: + dataset_types = [normalize_dataset_type_name(t) for t in summary_datasets] + request = QueryCollectionInfoRequestModel( expression=convert_collection_arg_to_glob_string_list(expression), collection_types=types, @@ -104,6 +110,8 @@ def query_info( include_chains=include_chains, include_parents=include_parents, include_summary=include_summary, + include_doc=include_doc, + summary_datasets=dataset_types, ) response = self._connection.post("query_collection_info", request) model = parse_model(response, QueryCollectionInfoResponseModel) @@ -115,7 +123,9 @@ def get_info( ) -> CollectionInfo: if has_globs(name): raise ValueError("Search expressions are not allowed in 'name' parameter to get_info") - results = self.query_info(name, include_parents=include_parents, include_summary=include_summary) + results = self.query_info( + name, include_parents=include_parents, include_summary=include_summary, include_doc=True + ) assert len(results) == 1, "Only one result should be returned for get_info." return results[0] diff --git a/python/lsst/daf/butler/remote_butler/server/handlers/_external.py b/python/lsst/daf/butler/remote_butler/server/handlers/_external.py index 982c008c42..bfa90b2d8a 100644 --- a/python/lsst/daf/butler/remote_butler/server/handlers/_external.py +++ b/python/lsst/daf/butler/remote_butler/server/handlers/_external.py @@ -267,6 +267,8 @@ def query_collection_info( include_chains=request.include_chains, include_parents=request.include_parents, include_summary=request.include_summary, + include_doc=request.include_doc, + summary_datasets=request.summary_datasets, ) return QueryCollectionInfoResponseModel(collections=list(collections)) diff --git a/python/lsst/daf/butler/remote_butler/server_models.py b/python/lsst/daf/butler/remote_butler/server_models.py index 6573d70967..1c688fd213 100644 --- a/python/lsst/daf/butler/remote_butler/server_models.py +++ b/python/lsst/daf/butler/remote_butler/server_models.py @@ -205,6 +205,8 @@ class QueryCollectionInfoRequestModel(pydantic.BaseModel): include_chains: bool include_parents: bool include_summary: bool + include_doc: bool + summary_datasets: list[DatasetTypeName] | None class QueryCollectionInfoResponseModel(pydantic.BaseModel): diff --git a/python/lsst/daf/butler/tests/hybrid_butler_collections.py b/python/lsst/daf/butler/tests/hybrid_butler_collections.py index 2ca5522dc7..c375b27af3 100644 --- a/python/lsst/daf/butler/tests/hybrid_butler_collections.py +++ b/python/lsst/daf/butler/tests/hybrid_butler_collections.py @@ -87,7 +87,7 @@ def query_info( include_parents: bool = False, include_summary: bool = False, include_doc: bool = False, - summary_datasets: Iterable[DatasetType] | None = None, + summary_datasets: Iterable[DatasetType] | Iterable[str] | None = None, ) -> Sequence[CollectionInfo]: return self._hybrid._remote_butler.collections.query_info( expression,