Skip to content

Commit

Permalink
WIP: RemoteButler.find_dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
timj committed Oct 26, 2023
1 parent c856cef commit 1020e39
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 4 deletions.
35 changes: 33 additions & 2 deletions python/lsst/daf/butler/remote_butler/_remote_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@
from .._storage_class import StorageClass
from .._timespan import Timespan
from ..datastore import DatasetRefURIs
from ..dimensions import DataId, DimensionConfig, DimensionUniverse
from ..registry import CollectionArgType, Registry, RegistryDefaults
from ..dimensions import DataCoordinate, DataId, DimensionConfig, DimensionUniverse, SerializedDataCoordinate
from ..registry import CollectionArgType, NoDefaultCollectionError, Registry, RegistryDefaults
from ..transfers import RepoExportContext
from ._config import RemoteButlerConfigModel

Expand Down Expand Up @@ -101,6 +101,26 @@ def dimensions(self) -> DimensionUniverse:
self._dimensions = DimensionUniverse(config)
return self._dimensions

def _simplify_dataId(self, dataId: DataId | None) -> SerializedDataCoordinate | None:
"""Take a generic Data ID and convert it to a serializable form.
Parameters
----------
dataId : `dict`, `None`, `DataCoordinate`
The data ID to serialize.
Returns
-------
data_id : `SerializedDataCoordinate` or `None`
A serializable form.
"""
if dataId is None:
return None

Check warning on line 118 in python/lsst/daf/butler/remote_butler/_remote_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler.py#L118

Added line #L118 was not covered by tests
if isinstance(dataId, DataCoordinate):
return dataId.to_simple()

Check warning on line 120 in python/lsst/daf/butler/remote_butler/_remote_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler.py#L120

Added line #L120 was not covered by tests
# Assume we can treat it as a dict.
return SerializedDataCoordinate(dataId=dataId)

Check warning on line 122 in python/lsst/daf/butler/remote_butler/_remote_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler.py#L122

Added line #L122 was not covered by tests

def getDatasetType(self, name: str) -> DatasetType:
# Docstring inherited.
raise NotImplementedError()
Expand Down Expand Up @@ -196,6 +216,17 @@ def find_dataset(
datastore_records: bool = False,
**kwargs: Any,
) -> DatasetRef | None:
if collections is None:

Check warning on line 219 in python/lsst/daf/butler/remote_butler/_remote_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler.py#L219

Added line #L219 was not covered by tests
if not self.collections:
raise NoDefaultCollectionError(

Check warning on line 221 in python/lsst/daf/butler/remote_butler/_remote_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler.py#L221

Added line #L221 was not covered by tests
"No collections provided to find_dataset, and no defaults from butler construction."
)
collections = self.collections

Check warning on line 224 in python/lsst/daf/butler/remote_butler/_remote_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler.py#L224

Added line #L224 was not covered by tests
# Temporary hack. Assume strings for collections. In future
# want to construct CollectionWildcard and filter it through collection
# cache to generate list of collection names.
# collection_strings = [str(c) for c in collections]

raise NotImplementedError()

def retrieveArtifacts(
Expand Down
53 changes: 51 additions & 2 deletions python/lsst/daf/butler/remote_butler/server/_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,15 @@
from functools import cache
from typing import Any

from fastapi import Depends, FastAPI
from fastapi import Depends, FastAPI, Query
from fastapi.middleware.gzip import GZipMiddleware
from lsst.daf.butler import Butler, SerializedDatasetType
from lsst.daf.butler import (
Butler,
DataCoordinate,
SerializedDataCoordinate,
SerializedDatasetRef,
SerializedDatasetType,
)

from ._factory import Factory

Expand All @@ -56,6 +62,26 @@ def factory_dependency() -> Factory:
return Factory(butler=_make_global_butler())


def unpack_dataId(butler: Butler, data_id: SerializedDataCoordinate | None) -> DataCoordinate | None:
"""Convert the serialized dataId back to full DataCoordinate.
Parameters
----------
butler : `lsst.daf.butler.Butler`
The butler to use for registry and universe.
data_id : `SerializedDataCoordinate` or `None`
The serialized form.
Returns
-------
dataId : `DataCoordinate` or `None`
The DataId usable by registry.
"""
if data_id is None:
return None
return DataCoordinate.from_simple(data_id, registry=butler.registry)

Check warning on line 82 in python/lsst/daf/butler/remote_butler/server/_server.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/server/_server.py#L81-L82

Added lines #L81 - L82 were not covered by tests


@app.get("/butler/v1/universe", response_model=dict[str, Any])
def get_dimension_universe(factory: Factory = Depends(factory_dependency)) -> dict[str, Any]:
"""Allow remote client to get dimensions definition."""
Expand All @@ -78,3 +104,26 @@ def get_dataset_type(
butler = factory.create_butler()
datasetType = butler.get_dataset_type(dataset_type_name)
return datasetType.to_simple()


# Not yet supported: TimeSpan is not yet a pydantic model.
# collections parameter assumes client-side has resolved regexes.
@app.post(
"/butler/v1/find_dataset/{datasetType}",
summary="Retrieve this dataset definition from collection, dataset type, and dataId",
response_model=SerializedDatasetRef,
response_model_exclude_unset=True,
response_model_exclude_defaults=True,
response_model_exclude_none=True,
)
def find_dataset(
datasetType: str,
dataId: SerializedDataCoordinate | None = None,
collections: list[str] | None = Query(None),
factory: Factory = Depends(factory_dependency),
) -> SerializedDatasetRef | None:
collection_query = collections if collections else None

Check warning on line 125 in python/lsst/daf/butler/remote_butler/server/_server.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/server/_server.py#L125

Added line #L125 was not covered by tests

butler = factory.create_butler()
ref = butler.find_dataset(datasetType, dataId=unpack_dataId(butler, dataId), collections=collection_query)
return ref.to_simple() if ref else None

Check warning on line 129 in python/lsst/daf/butler/remote_butler/server/_server.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/server/_server.py#L127-L129

Added lines #L127 - L129 were not covered by tests

0 comments on commit 1020e39

Please sign in to comment.