Skip to content

Commit

Permalink
Implement RemoteButler.getURI
Browse files Browse the repository at this point in the history
Moved the DirectButler implementation to the Butler base class, since the code is identical for RemoteButler.
  • Loading branch information
dhirving committed Dec 15, 2023
1 parent fb57ca9 commit cca5c5d
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 91 deletions.
12 changes: 10 additions & 2 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,6 @@ def getURIs(
"""
raise NotImplementedError()

@abstractmethod
def getURI(
self,
datasetRefOrType: DatasetRef | DatasetType | str,
Expand Down Expand Up @@ -808,7 +807,16 @@ def getURI(
Raised if a URI is requested for a dataset that consists of
multiple artifacts.
"""
raise NotImplementedError()
primary, components = self.getURIs(
datasetRefOrType, dataId=dataId, predict=predict, collections=collections, run=run, **kwargs
)

if primary is None or components:
raise RuntimeError(
f"Dataset ({datasetRefOrType}) includes distinct URIs for components. "
"Use Butler.getURIs() instead."
)
return primary

@abstractmethod
def get_dataset_type(self, name: str) -> DatasetType:
Expand Down
72 changes: 0 additions & 72 deletions python/lsst/daf/butler/direct_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1259,78 +1259,6 @@ def getURIs(
)
return self._datastore.getURIs(ref, predict)

def getURI(
self,
datasetRefOrType: DatasetRef | DatasetType | str,
/,
dataId: DataId | None = None,
*,
predict: bool = False,
collections: Any = None,
run: str | None = None,
**kwargs: Any,
) -> ResourcePath:
"""Return the URI to the Dataset.
Parameters
----------
datasetRefOrType : `DatasetRef`, `DatasetType`, or `str`
When `DatasetRef` the `dataId` should be `None`.
Otherwise the `DatasetType` or name thereof.
dataId : `dict` or `DataCoordinate`
A `dict` of `Dimension` link name, value pairs that label the
`DatasetRef` within a Collection. When `None`, a `DatasetRef`
should be provided as the first argument.
predict : `bool`
If `True`, allow URIs to be returned of datasets that have not
been written.
collections : Any, optional
Collections to be searched, overriding ``self.collections``.
Can be any of the types supported by the ``collections`` argument
to butler construction.
run : `str`, optional
Run to use for predictions, overriding ``self.run``.
**kwargs
Additional keyword arguments used to augment or construct a
`DataCoordinate`. See `DataCoordinate.standardize`
parameters.
Returns
-------
uri : `lsst.resources.ResourcePath`
URI pointing to the Dataset within the datastore. If the
Dataset does not exist in the datastore, and if ``predict`` is
`True`, the URI will be a prediction and will include a URI
fragment "#predicted".
If the datastore does not have entities that relate well
to the concept of a URI the returned URI string will be
descriptive. The returned URI is not guaranteed to be obtainable.
Raises
------
LookupError
A URI has been requested for a dataset that does not exist and
guessing is not allowed.
ValueError
Raised if a resolved `DatasetRef` was passed as an input, but it
differs from the one found in the registry.
TypeError
Raised if no collections were provided.
RuntimeError
Raised if a URI is requested for a dataset that consists of
multiple artifacts.
"""
primary, components = self.getURIs(
datasetRefOrType, dataId=dataId, predict=predict, collections=collections, run=run, **kwargs
)

if primary is None or components:
raise RuntimeError(
f"Dataset ({datasetRefOrType}) includes distinct URIs for components. "
"Use Butler.getURIs() instead."
)
return primary

def get_dataset_type(self, name: str) -> DatasetType:
return self._registry.getDatasetType(name)

Expand Down
14 changes: 0 additions & 14 deletions python/lsst/daf/butler/remote_butler/_remote_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,20 +310,6 @@ def getURIs(
components[component] = ResourcePath(str(f.url))
return DatasetRefURIs(componentURIs=components)

def getURI(
self,
datasetRefOrType: DatasetRef | DatasetType | str,
/,
dataId: DataId | None = None,
*,
predict: bool = False,
collections: Any = None,
run: str | None = None,
**kwargs: Any,
) -> ResourcePath:
# Docstring inherited.
raise NotImplementedError()

def get_dataset_type(self, name: str) -> DatasetType:
# In future implementation this should directly access the cache
# and only go to the server if the dataset type is not known.
Expand Down
16 changes: 13 additions & 3 deletions tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
from lsst.daf.butler.datastore import DatasetRefURIs
from lsst.daf.butler.tests import DatastoreMock, addDatasetType
from lsst.daf.butler.tests.utils import MetricsExample, MetricTestRepo, makeTestTempDir, removeTestTempDir
from lsst.resources import ResourcePath
from lsst.resources.http import HttpResourcePath

TESTDIR = os.path.abspath(os.path.dirname(__file__))
Expand Down Expand Up @@ -329,11 +330,16 @@ def check_sc_override(converted):

def test_getURIs_no_components(self):
# This dataset does not have components, and should return one URI.
def check_uri(uri: ResourcePath):
self.assertIsNotNone(uris.primaryURI)
self.assertEqual(uris.primaryURI.scheme, "https")
self.assertEqual(uris.primaryURI.read(), b"123")

uris = self.butler.getURIs(self.simple_dataset_ref)
self.assertEqual(len(uris.componentURIs), 0)
self.assertIsNotNone(uris.primaryURI)
self.assertEqual(uris.primaryURI.scheme, "https")
self.assertEqual(uris.primaryURI.read(), b"123")
check_uri(uris.primaryURI)

check_uri(self.butler.getURI(self.simple_dataset_ref))

def test_getURIs_multiple_components(self):
# This dataset has multiple components, so we should get back multiple
Expand All @@ -353,6 +359,10 @@ def check_uris(uris: DatasetRefURIs):
uris = self.butler.getURIs(dataset_type, dataId=data_id, collections=collections)
check_uris(uris)

# Calling getURI on a multi-file dataset raises an exception
with self.assertRaises(RuntimeError):
self.butler.getURI(dataset_type, dataId=data_id, collections=collections)

# getURIs does NOT respect component overrides on the DatasetRef,
# instead returning the parent's URIs. Unclear if this is "correct"
# from a conceptual point of view, but this matches DirectButler
Expand Down

0 comments on commit cca5c5d

Please sign in to comment.