Handle collection query optimization parameters

Update RemoteButlerCollections to pass the new optimization parameters added in DM-45993 to the server.
lsst · Sep 10, 2024 · 8c3ca9c · 8c3ca9c
1 parent 56d6a27
commit 8c3ca9c
Show file tree

Hide file tree

Showing 10 changed files with 49 additions and 17 deletions.
diff --git a/python/lsst/daf/butler/_butler_collections.py b/python/lsst/daf/butler/_butler_collections.py
@@ -283,7 +283,7 @@ def query_info(
         include_parents: bool = False,
         include_summary: bool = False,
         include_doc: bool = False,
-        summary_datasets: Iterable[DatasetType] | None = None,
+        summary_datasets: Iterable[DatasetType] | Iterable[str] | None = None,
     ) -> Sequence[CollectionInfo]:
         """Query the butler for collections matching an expression and
         return detailed information about those collections.
@@ -310,8 +310,8 @@ def query_info(
         include_doc : `bool`, optional
             Whether the returned information includes collection documentation
             string.
-        summary_datasets : `~collections.abc.Iterable` [ `DatasetType` ], \
-                optional
+        summary_datasets : `~collections.abc.Iterable` [ `DatasetType` ] or \
+            `~collections.abc.Iterable` [ `str` ], optional
             Dataset types to include in returned summaries. Only used if
             ``include_summary`` is `True`. If not specified then all dataset
             types will be included.

diff --git a/python/lsst/daf/butler/_dataset_type.py b/python/lsst/daf/butler/_dataset_type.py
@@ -796,3 +796,21 @@ def _unpickle_via_factory(factory: Callable, args: Any, kwargs: Any) -> DatasetT
     arguments as well as positional arguments.
     """
     return factory(*args, **kwargs)
+
+
+def get_dataset_type_name(datasetTypeOrName: DatasetType | str) -> str:
+    """Given a `DatasetType` object or a dataset type name, return a dataset
+    type name.
+
+    Parameters
+    ----------
+    datasetTypeOrName : `DatasetType` | `str`
+        A DatasetType, or the name of a DatasetType. This union is a common
+        parameter in many `Butler` methods.
+    """
+    if isinstance(datasetTypeOrName, DatasetType):
+        return datasetTypeOrName.name
+    elif isinstance(datasetTypeOrName, str):
+        return datasetTypeOrName
+    else:
+        raise TypeError(f"Expected DatasetType or str, got unexpected object: {datasetTypeOrName}")
diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py b/python/lsst/daf/butler/direct_butler/_direct_butler_collections.py
@@ -114,7 +114,7 @@ def query_info(
         include_parents: bool = False,
         include_summary: bool = False,
         include_doc: bool = False,
-        summary_datasets: Iterable[DatasetType] | None = None,
+        summary_datasets: Iterable[DatasetType] | Iterable[str] | None = None,
     ) -> Sequence[CollectionInfo]:
         info = []
         with self._registry.caching_context():

diff --git a/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py b/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py
@@ -12,6 +12,7 @@
 import sqlalchemy
 
 from ...._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef, DatasetType
+from ...._dataset_type import get_dataset_type_name
 from ...._exceptions_legacy import DatasetTypeError
 from ....dimensions import DimensionUniverse
 from ..._collection_summary import CollectionSummary
@@ -511,12 +512,14 @@ def getCollectionSummary(self, collection: CollectionRecord) -> CollectionSummar
         return summaries[collection.key]
 
     def fetch_summaries(
-        self, collections: Iterable[CollectionRecord], dataset_types: Iterable[DatasetType] | None = None
+        self,
+        collections: Iterable[CollectionRecord],
+        dataset_types: Iterable[DatasetType] | Iterable[str] | None = None,
     ) -> Mapping[Any, CollectionSummary]:
         # Docstring inherited from DatasetRecordStorageManager.
         dataset_type_names: Iterable[str] | None = None
         if dataset_types is not None:
-            dataset_type_names = set(dataset_type.name for dataset_type in dataset_types)
+            dataset_type_names = set(get_dataset_type_name(dt) for dt in dataset_types)
         return self._summaries.fetch_summaries(collections, dataset_type_names, self._dataset_type_from_row)
 
     _versions: list[VersionTuple]

diff --git a/python/lsst/daf/butler/registry/interfaces/_datasets.py b/python/lsst/daf/butler/registry/interfaces/_datasets.py
@@ -674,7 +674,9 @@ def getCollectionSummary(self, collection: CollectionRecord) -> CollectionSummar
 
     @abstractmethod
     def fetch_summaries(
-        self, collections: Iterable[CollectionRecord], dataset_types: Iterable[DatasetType] | None = None
+        self,
+        collections: Iterable[CollectionRecord],
+        dataset_types: Iterable[DatasetType] | Iterable[str] | None = None,
     ) -> Mapping[Any, CollectionSummary]:
         """Fetch collection summaries given their names and dataset types.
 

diff --git a/python/lsst/daf/butler/remote_butler/_ref_utils.py b/python/lsst/daf/butler/remote_butler/_ref_utils.py
@@ -36,7 +36,7 @@
 from pydantic import TypeAdapter
 
 from .._dataset_ref import DatasetRef
-from .._dataset_type import DatasetType
+from .._dataset_type import DatasetType, get_dataset_type_name
 from .._storage_class import StorageClass
 from ..dimensions import DataCoordinate, DataId, DataIdValue, SerializedDataId
 from .server_models import DatasetTypeName
@@ -85,12 +85,7 @@ def normalize_dataset_type_name(datasetTypeOrName: DatasetType | str) -> Dataset
         A DatasetType, or the name of a DatasetType. This union is a common
         parameter in many `Butler` methods.
     """
-    if isinstance(datasetTypeOrName, DatasetType):
-        return DatasetTypeName(datasetTypeOrName.name)
-    elif isinstance(datasetTypeOrName, str):
-        return DatasetTypeName(datasetTypeOrName)
-    else:
-        raise TypeError(f"Got unexpected object for DatasetType: {datasetTypeOrName}")
+    return DatasetTypeName(get_dataset_type_name(datasetTypeOrName))
 
 
 def simplify_dataId(dataId: DataId | None, kwargs: dict[str, DataIdValue]) -> SerializedDataId:

diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py b/python/lsst/daf/butler/remote_butler/_remote_butler_collections.py
@@ -40,6 +40,7 @@
 from ._collection_args import convert_collection_arg_to_glob_string_list
 from ._defaults import DefaultsHolder
 from ._http_connection import RemoteButlerHttpConnection, parse_model
+from ._ref_utils import normalize_dataset_type_name
 from .server_models import QueryCollectionInfoRequestModel, QueryCollectionInfoResponseModel
 
 
@@ -87,7 +88,7 @@ def query_info(
         include_parents: bool = False,
         include_summary: bool = False,
         include_doc: bool = False,
-        summary_datasets: Iterable[DatasetType] | None = None,
+        summary_datasets: Iterable[DatasetType] | Iterable[str] | None = None,
     ) -> Sequence[CollectionInfo]:
         if collection_types is None:
             types = list(CollectionType.all())
@@ -97,13 +98,20 @@ def query_info(
         if include_chains is None:
             include_chains = not flatten_chains
 
+        if summary_datasets is None:
+            dataset_types = None
+        else:
+            dataset_types = [normalize_dataset_type_name(t) for t in summary_datasets]
+
         request = QueryCollectionInfoRequestModel(
             expression=convert_collection_arg_to_glob_string_list(expression),
             collection_types=types,
             flatten_chains=flatten_chains,
             include_chains=include_chains,
             include_parents=include_parents,
             include_summary=include_summary,
+            include_doc=include_doc,
+            summary_datasets=dataset_types,
         )
         response = self._connection.post("query_collection_info", request)
         model = parse_model(response, QueryCollectionInfoResponseModel)
@@ -115,7 +123,9 @@ def get_info(
     ) -> CollectionInfo:
         if has_globs(name):
             raise ValueError("Search expressions are not allowed in 'name' parameter to get_info")
-        results = self.query_info(name, include_parents=include_parents, include_summary=include_summary)
+        results = self.query_info(
+            name, include_parents=include_parents, include_summary=include_summary, include_doc=True
+        )
         assert len(results) == 1, "Only one result should be returned for get_info."
         return results[0]
 

diff --git a/python/lsst/daf/butler/remote_butler/server/handlers/_external.py b/python/lsst/daf/butler/remote_butler/server/handlers/_external.py
@@ -267,6 +267,8 @@ def query_collection_info(
         include_chains=request.include_chains,
         include_parents=request.include_parents,
         include_summary=request.include_summary,
+        include_doc=request.include_doc,
+        summary_datasets=request.summary_datasets,
     )
     return QueryCollectionInfoResponseModel(collections=list(collections))
 

diff --git a/python/lsst/daf/butler/remote_butler/server_models.py b/python/lsst/daf/butler/remote_butler/server_models.py
@@ -205,6 +205,8 @@ class QueryCollectionInfoRequestModel(pydantic.BaseModel):
     include_chains: bool
     include_parents: bool
     include_summary: bool
+    include_doc: bool
+    summary_datasets: list[DatasetTypeName] | None
 
 
 class QueryCollectionInfoResponseModel(pydantic.BaseModel):

diff --git a/python/lsst/daf/butler/tests/hybrid_butler_collections.py b/python/lsst/daf/butler/tests/hybrid_butler_collections.py
@@ -87,7 +87,7 @@ def query_info(
         include_parents: bool = False,
         include_summary: bool = False,
         include_doc: bool = False,
-        summary_datasets: Iterable[DatasetType] | None = None,
+        summary_datasets: Iterable[DatasetType] | Iterable[str] | None = None,
     ) -> Sequence[CollectionInfo]:
         return self._hybrid._remote_butler.collections.query_info(
             expression,