Skip to content

Commit

Permalink
Add documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
dhirving committed Nov 29, 2023
1 parent 4c72aa3 commit 9097407
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 6 deletions.
4 changes: 3 additions & 1 deletion python/lsst/daf/butler/datastores/fileDatastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -2000,7 +2000,9 @@ def prepare_get_for_external_client(self, ref: DatasetRef) -> FileDatastoreGetPa

def to_file_info_payload(info: DatasetLocationInformation) -> FileDatastoreGetPayloadFileInfo:
location, file_info = info
return FileDatastoreGetPayloadFileInfo(url=location.uri.geturl(), metadata=file_info.to_simple())
return FileDatastoreGetPayloadFileInfo(
url=location.uri.geturl(), datastoreRecords=file_info.to_simple()
)

return FileDatastoreGetPayload(
datastore_type="file",
Expand Down
47 changes: 42 additions & 5 deletions python/lsst/daf/butler/datastores/fileDatastoreClient.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,34 +15,71 @@


class FileDatastoreGetPayloadFileInfo(_BaseModelCompat):
"""Information required to read a single file stored in `FileDatastore`"""

# TODO DM-41879: Allowing arbitrary URLs here is a severe security issue,
# since it allows the server to trick the client into fetching data from
# any file on its local filesystem or from remote storage using credentials
# laying around in the environment. This should be restricted to only
# HTTP, but we don't yet have a means of mocking out HTTP gets in tests.
url: str
metadata: SerializedStoredFileInfo
"""An absolute URL that can be used to read the file"""

datastoreRecords: SerializedStoredFileInfo
"""`FileDatastore` metadata records for this file"""


class FileDatastoreGetPayload(_BaseModelCompat):
"""A serializable representation of the data needed for retrieving an
artifact and converting it to a python object.
"""

datastore_type: Literal["file"]

file_info: list[FileDatastoreGetPayloadFileInfo]
"""List of retrieval information for each file associated with this
artifact
"""

dataset_ref: SerializedDatasetRef
"""Registry information associated with this artifact"""


def get_dataset_as_python_object(
model: FileDatastoreGetPayload,
payload: FileDatastoreGetPayload,
*,
universe: DimensionUniverse,
parameters: Mapping[str, Any] | None,
storageClass: StorageClass | str | None,
) -> Any:
"""Retrieve an artifact from storage and return it as a Python object
Parameters
----------
payload : `FileDatastoreGetPayload`
Pre-processed information about each file associated with this artifact
universe: `DimensionUniverse`
The universe of dimensions associated with the `DatasetRef` contained
in `payload`.
parameters : `Mapping`[`str`, `Any`]
`StorageClass` and `Formatter` parameters to be used when converting
the artifact to a Python object
storageClass: `StorageClass` | `str` | `None`
Overrides the `StorageClass` to be used when converting the artifact to
a Python object. If `None`, uses the `StorageClass` specified by
`payload`.
Returns
-------
python_object: `Any`
The retrieved artifact, converted to a Python object
"""
fileLocations: list[DatasetLocationInformation] = [
(Location(None, file_info.url), StoredFileInfo.from_simple(file_info.metadata))
for file_info in model.file_info
(Location(None, file_info.url), StoredFileInfo.from_simple(file_info.datastoreRecords))
for file_info in payload.file_info
]

ref = DatasetRef.from_simple(model.dataset_ref, universe=universe)
ref = DatasetRef.from_simple(payload.dataset_ref, universe=universe)
if storageClass is not None:
ref = ref.overrideStorageClass(storageClass)

Expand Down
42 changes: 42 additions & 0 deletions python/lsst/daf/butler/datastores/file_datastore/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,27 @@ def generate_datastore_get_information(
parameters: Mapping[str, Any] | None,
readStorageClass: StorageClass | None = None,
) -> list[DatastoreFileGetInformation]:
"""Process parameters and instantiate formatters for in preparation for
retrieving an artifact and converting it to a Python object
Parameters
----------
fileLocations : `list`[`DatasetLocationInformation`]
List of file locations for this artifact and their associated datastore
records
ref : `DatasetRef`
The registry information associated with this artifact.
parameters : `Mapping`[`str`, `Any`]
`StorageClass` and `Formatter` parameters
readStorageClass : `StorageClass` | `None`, optional
The StorageClass to use when ultimately returning the resulting object
from the get. Defaults to the `StorageClass` specified by `ref`
Returns
-------
getInfo : `list[DatastoreFileGetInformation]`
The parameters needed to retrieve each file
"""
if readStorageClass is None:
readStorageClass = ref.datasetType.storageClass

Expand Down Expand Up @@ -154,6 +175,8 @@ def _read_artifact_into_memory(
The registry information associated with this artifact.
isComponent : `bool`
Flag to indicate if a component is being read from this artifact.
cache_manager: `AbstractDatastoreCacheManager`
The cache manager to use for caching retrieved files
cache_ref : `DatasetRef`, optional
The DatasetRef to use when looking up the file in the cache.
This ref must have the same ID as the supplied ref but can
Expand Down Expand Up @@ -316,6 +339,25 @@ def get_dataset_as_python_object_from_get_info(
parameters: Mapping[str, Any] | None,
cache_manager: AbstractDatastoreCacheManager,
) -> Any:
"""Retrieve an artifact from storage and return it as a Python object
Parameters
----------
allGetInfo : `list`[`DatastoreFileGetInformation`]
Pre-processed information about each file associated with this artifact
ref : `DatasetRef`
The registry information associated with this artifact.
parameters : `Mapping`[`str`, `Any`]
`StorageClass` and `Formatter` parameters
cache_manager: `AbstractDatastoreCacheManager`
The cache manager to use for caching retrieved files
Returns
-------
python_object: `Any`
The retrieved artifact, converted to a Python object according to the
`StorageClass` specified in `ref`.
"""
refStorageClass = ref.datasetType.storageClass
refComponent = ref.datasetType.component()
# Create mapping from component name to related info
Expand Down

0 comments on commit 9097407

Please sign in to comment.