From 9406699743d3dac5dafe2c6b05f0ef9b044ca2a5 Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Wed, 29 Nov 2023 16:34:09 -0700 Subject: [PATCH] Add documentation --- .../daf/butler/datastores/fileDatastore.py | 4 +- .../butler/datastores/fileDatastoreClient.py | 47 +++++++++++++++++-- .../butler/datastores/file_datastore/get.py | 42 +++++++++++++++++ 3 files changed, 87 insertions(+), 6 deletions(-) diff --git a/python/lsst/daf/butler/datastores/fileDatastore.py b/python/lsst/daf/butler/datastores/fileDatastore.py index 685eee7168..cf2d879447 100644 --- a/python/lsst/daf/butler/datastores/fileDatastore.py +++ b/python/lsst/daf/butler/datastores/fileDatastore.py @@ -2000,7 +2000,9 @@ def prepare_get_for_external_client(self, ref: DatasetRef) -> FileDatastoreGetPa def to_file_info_payload(info: DatasetLocationInformation) -> FileDatastoreGetPayloadFileInfo: location, file_info = info - return FileDatastoreGetPayloadFileInfo(url=location.uri.geturl(), metadata=file_info.to_simple()) + return FileDatastoreGetPayloadFileInfo( + url=location.uri.geturl(), datastoreRecords=file_info.to_simple() + ) return FileDatastoreGetPayload( datastore_type="file", diff --git a/python/lsst/daf/butler/datastores/fileDatastoreClient.py b/python/lsst/daf/butler/datastores/fileDatastoreClient.py index 525e396b02..a13cb83851 100644 --- a/python/lsst/daf/butler/datastores/fileDatastoreClient.py +++ b/python/lsst/daf/butler/datastores/fileDatastoreClient.py @@ -15,34 +15,71 @@ class FileDatastoreGetPayloadFileInfo(_BaseModelCompat): + """Information required to read a single file stored in `FileDatastore`""" + # TODO DM-41879: Allowing arbitrary URLs here is a severe security issue, # since it allows the server to trick the client into fetching data from # any file on its local filesystem or from remote storage using credentials # laying around in the environment. This should be restricted to only # HTTP, but we don't yet have a means of mocking out HTTP gets in tests. url: str - metadata: SerializedStoredFileInfo + """An absolute URL that can be used to read the file""" + + datastoreRecords: SerializedStoredFileInfo + """`FileDatastore` metadata records for this file""" class FileDatastoreGetPayload(_BaseModelCompat): + """A serializable representation of the data needed for retrieving an + artifact and converting it to a python object. + """ + datastore_type: Literal["file"] + file_info: list[FileDatastoreGetPayloadFileInfo] + """List of retrieval information for each file associated with this + artifact + """ + dataset_ref: SerializedDatasetRef + """Registry information associated with this artifact""" def get_dataset_as_python_object( - model: FileDatastoreGetPayload, + payload: FileDatastoreGetPayload, *, universe: DimensionUniverse, parameters: Mapping[str, Any] | None, storageClass: StorageClass | str | None, ) -> Any: + """Retrieve an artifact from storage and return it as a Python object + + Parameters + ---------- + payload : `FileDatastoreGetPayload` + Pre-processed information about each file associated with this artifact + universe: `DimensionUniverse` + The universe of dimensions associated with the `DatasetRef` contained + in `payload`. + parameters : `Mapping`[`str`, `Any`] + `StorageClass` and `Formatter` parameters to be used when converting + the artifact to a Python object + storageClass: `StorageClass` | `str` | `None` + Overrides the `StorageClass` to be used when converting the artifact to + a Python object. If `None`, uses the `StorageClass` specified by + `payload`. + + Returns + ------- + python_object: `Any` + The retrieved artifact, converted to a Python object + """ fileLocations: list[DatasetLocationInformation] = [ - (Location(None, file_info.url), StoredFileInfo.from_simple(file_info.metadata)) - for file_info in model.file_info + (Location(None, file_info.url), StoredFileInfo.from_simple(file_info.datastoreRecords)) + for file_info in payload.file_info ] - ref = DatasetRef.from_simple(model.dataset_ref, universe=universe) + ref = DatasetRef.from_simple(payload.dataset_ref, universe=universe) if storageClass is not None: ref = ref.overrideStorageClass(storageClass) diff --git a/python/lsst/daf/butler/datastores/file_datastore/get.py b/python/lsst/daf/butler/datastores/file_datastore/get.py index 27474c12c1..7019582e3b 100644 --- a/python/lsst/daf/butler/datastores/file_datastore/get.py +++ b/python/lsst/daf/butler/datastores/file_datastore/get.py @@ -84,6 +84,27 @@ def generate_datastore_get_information( parameters: Mapping[str, Any] | None, readStorageClass: StorageClass | None = None, ) -> list[DatastoreFileGetInformation]: + """Process parameters and instantiate formatters for in preparation for + retrieving an artifact and converting it to a Python object + + Parameters + ---------- + fileLocations : `list`[`DatasetLocationInformation`] + List of file locations for this artifact and their associated datastore + records + ref : `DatasetRef` + The registry information associated with this artifact. + parameters : `Mapping`[`str`, `Any`] + `StorageClass` and `Formatter` parameters + readStorageClass : `StorageClass` | `None`, optional + The StorageClass to use when ultimately returning the resulting object + from the get. Defaults to the `StorageClass` specified by `ref` + + Returns + ------- + getInfo : `list[DatastoreFileGetInformation]` + The parameters needed to retrieve each file + """ if readStorageClass is None: readStorageClass = ref.datasetType.storageClass @@ -154,6 +175,8 @@ def _read_artifact_into_memory( The registry information associated with this artifact. isComponent : `bool` Flag to indicate if a component is being read from this artifact. + cache_manager: `AbstractDatastoreCacheManager` + The cache manager to use for caching retrieved files cache_ref : `DatasetRef`, optional The DatasetRef to use when looking up the file in the cache. This ref must have the same ID as the supplied ref but can @@ -316,6 +339,25 @@ def get_dataset_as_python_object_from_get_info( parameters: Mapping[str, Any] | None, cache_manager: AbstractDatastoreCacheManager, ) -> Any: + """Retrieve an artifact from storage and return it as a Python object + + Parameters + ---------- + allGetInfo : `list`[`DatastoreFileGetInformation`] + Pre-processed information about each file associated with this artifact + ref : `DatasetRef` + The registry information associated with this artifact. + parameters : `Mapping`[`str`, `Any`] + `StorageClass` and `Formatter` parameters + cache_manager: `AbstractDatastoreCacheManager` + The cache manager to use for caching retrieved files + + Returns + ------- + python_object: `Any` + The retrieved artifact, converted to a Python object according to the + `StorageClass` specified in `ref`. + """ refStorageClass = ref.datasetType.storageClass refComponent = ref.datasetType.component() # Create mapping from component name to related info