-
Notifications
You must be signed in to change notification settings - Fork 14
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DM-41162: Create minimalist RemoteButler client and FastAPI server #897
Changes from all commits
48ecfc8
9b608ce
b6b2975
81c29c5
f1fbc18
3ddba48
d344639
6f6001b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# This file is part of daf_butler. | ||
# | ||
# Developed for the LSST Data Management System. | ||
# This product includes software developed by the LSST Project | ||
# (http://www.lsst.org). | ||
# See the COPYRIGHT file at the top-level directory of this distribution | ||
# for details of code ownership. | ||
# | ||
# This software is dual licensed under the GNU General Public License and also | ||
# under a 3-clause BSD license. Recipients may choose which of these licenses | ||
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, | ||
# respectively. If you choose the GPL option then the following text applies | ||
# (but note that there is still no warranty even if you opt for BSD instead): | ||
# | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
from ._remote_butler import * |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# This file is part of daf_butler. | ||
# | ||
# Developed for the LSST Data Management System. | ||
# This product includes software developed by the LSST Project | ||
# (http://www.lsst.org). | ||
# See the COPYRIGHT file at the top-level directory of this distribution | ||
# for details of code ownership. | ||
# | ||
# This software is dual licensed under the GNU General Public License and also | ||
# under a 3-clause BSD license. Recipients may choose which of these licenses | ||
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, | ||
# respectively. If you choose the GPL option then the following text applies | ||
# (but note that there is still no warranty even if you opt for BSD instead): | ||
# | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
from pydantic import AnyHttpUrl | ||
|
||
from .._compat import _BaseModelCompat | ||
|
||
|
||
class RemoteButlerOptionsModel(_BaseModelCompat): | ||
url: AnyHttpUrl | ||
|
||
|
||
class RemoteButlerConfigModel(_BaseModelCompat): | ||
remote_butler: RemoteButlerOptionsModel |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,319 @@ | ||
# This file is part of daf_butler. | ||
# | ||
# Developed for the LSST Data Management System. | ||
# This product includes software developed by the LSST Project | ||
# (http://www.lsst.org). | ||
# See the COPYRIGHT file at the top-level directory of this distribution | ||
# for details of code ownership. | ||
# | ||
# This software is dual licensed under the GNU General Public License and also | ||
# under a 3-clause BSD license. Recipients may choose which of these licenses | ||
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, | ||
# respectively. If you choose the GPL option then the following text applies | ||
# (but note that there is still no warranty even if you opt for BSD instead): | ||
# | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
__all__ = ("RemoteButler",) | ||
|
||
from collections.abc import Collection, Iterable, Sequence | ||
from contextlib import AbstractContextManager | ||
from typing import Any, TextIO | ||
|
||
import httpx | ||
from lsst.daf.butler import __version__ | ||
from lsst.resources import ResourcePath, ResourcePathExpression | ||
from lsst.utils.introspection import get_full_type_name | ||
|
||
from .._butler import Butler | ||
from .._butler_config import ButlerConfig | ||
from .._config import Config | ||
from .._dataset_existence import DatasetExistence | ||
from .._dataset_ref import DatasetIdGenEnum, DatasetRef | ||
from .._dataset_type import DatasetType | ||
from .._deferredDatasetHandle import DeferredDatasetHandle | ||
from .._file_dataset import FileDataset | ||
from .._limited_butler import LimitedButler | ||
from .._storage_class import StorageClass | ||
from ..datastore import DatasetRefURIs | ||
from ..dimensions import DataId, DimensionConfig, DimensionUniverse | ||
from ..registry import Registry, RegistryDefaults | ||
from ..transfers import RepoExportContext | ||
from ._config import RemoteButlerConfigModel | ||
|
||
|
||
class RemoteButler(Butler): | ||
def __init__( | ||
self, | ||
# These parameters are inherited from the Butler() constructor | ||
config: Config | ResourcePathExpression | None = None, | ||
*, | ||
collections: Any = None, | ||
run: str | None = None, | ||
searchPaths: Sequence[ResourcePathExpression] | None = None, | ||
writeable: bool | None = None, | ||
inferDefaults: bool = True, | ||
# Parameters unique to RemoteButler | ||
http_client: httpx.Client | None = None, | ||
**kwargs: Any, | ||
): | ||
butler_config = ButlerConfig(config, searchPaths, without_datastore=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As a note eventually the datastore will be needed. |
||
self._config = RemoteButlerConfigModel.model_validate(butler_config) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this validate because There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that's correct. Currently the other parts of ButlerConfig are being discarded since we're not using them. I figured as we added usage of the other parts of ButlerConfig, we could add the appropriate validation for them. It's still not clear to me which parts of the existing DirectButler ButlerConfig are generated internally in the client code, which are potentially configured locally in a client-side config file, and which are only server-side concepts that the client never sees in a configuration file. I do wonder if some portion of the configuration would be returned from a versioned "init" endpoint (which maybe gives you back some config chunks, server capabilities, dimensions, collection names, etc all in one shot.) If there is only one "config file" endpoint that configuration format becomes an unversionable permanent part of the API |
||
self._dimensions: DimensionUniverse | None = None | ||
# TODO: RegistryDefaults should have finish() called on it, but this | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, we also need to work out what to do with kwargs since people can say There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree that just passing the defaults to the Query object will probably be the right thing. To stop the execution flow from getting too tangled up between the client and server, I think in general it's going to be the right approach to just record what the user asked for and give it to the server to actually do things with. |
||
# requires getCollectionSummary() which is not yet implemented | ||
self._registry_defaults = RegistryDefaults(collections, run, inferDefaults, **kwargs) | ||
|
||
if http_client is not None: | ||
# We have injected a client explicitly in to the class. | ||
# This is generally done for testing. | ||
self._client = http_client | ||
else: | ||
headers = {"user-agent": f"{get_full_type_name(self)}/{__version__}"} | ||
self._client = httpx.Client(headers=headers, base_url=str(self._config.remote_butler.url)) | ||
|
||
def isWriteable(self) -> bool: | ||
# Docstring inherited. | ||
return False | ||
|
||
@property | ||
def dimensions(self) -> DimensionUniverse: | ||
# Docstring inherited. | ||
if self._dimensions is not None: | ||
return self._dimensions | ||
|
||
response = self._client.get(self._get_url("universe")) | ||
response.raise_for_status() | ||
|
||
config = DimensionConfig.fromString(response.text, format="json") | ||
self._dimensions = DimensionUniverse(config) | ||
return self._dimensions | ||
|
||
def getDatasetType(self, name: str) -> DatasetType: | ||
# Docstring inherited. | ||
raise NotImplementedError() | ||
|
||
def transaction(self) -> AbstractContextManager[None]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @TallJimbo I'm assuming |
||
"""Will always raise NotImplementedError. | ||
Transactions are not supported by RemoteButler. | ||
""" | ||
raise NotImplementedError() | ||
|
||
def put( | ||
self, | ||
obj: Any, | ||
datasetRefOrType: DatasetRef | DatasetType | str, | ||
/, | ||
dataId: DataId | None = None, | ||
*, | ||
run: str | None = None, | ||
**kwargs: Any, | ||
) -> DatasetRef: | ||
# Docstring inherited. | ||
raise NotImplementedError() | ||
|
||
def getDeferred( | ||
self, | ||
datasetRefOrType: DatasetRef | DatasetType | str, | ||
/, | ||
dataId: DataId | None = None, | ||
*, | ||
parameters: dict | None = None, | ||
collections: Any = None, | ||
storageClass: str | StorageClass | None = None, | ||
**kwargs: Any, | ||
) -> DeferredDatasetHandle: | ||
# Docstring inherited. | ||
raise NotImplementedError() | ||
|
||
def get( | ||
self, | ||
datasetRefOrType: DatasetRef | DatasetType | str, | ||
/, | ||
dataId: DataId | None = None, | ||
*, | ||
parameters: dict[str, Any] | None = None, | ||
collections: Any = None, | ||
storageClass: StorageClass | str | None = None, | ||
**kwargs: Any, | ||
) -> Any: | ||
# Docstring inherited. | ||
raise NotImplementedError() | ||
|
||
def getURIs( | ||
self, | ||
datasetRefOrType: DatasetRef | DatasetType | str, | ||
/, | ||
dataId: DataId | None = None, | ||
*, | ||
predict: bool = False, | ||
collections: Any = None, | ||
run: str | None = None, | ||
**kwargs: Any, | ||
) -> DatasetRefURIs: | ||
# Docstring inherited. | ||
raise NotImplementedError() | ||
|
||
def getURI( | ||
self, | ||
datasetRefOrType: DatasetRef | DatasetType | str, | ||
/, | ||
dataId: DataId | None = None, | ||
*, | ||
predict: bool = False, | ||
collections: Any = None, | ||
run: str | None = None, | ||
**kwargs: Any, | ||
) -> ResourcePath: | ||
# Docstring inherited. | ||
raise NotImplementedError() | ||
|
||
def retrieveArtifacts( | ||
self, | ||
refs: Iterable[DatasetRef], | ||
destination: ResourcePathExpression, | ||
transfer: str = "auto", | ||
preserve_path: bool = True, | ||
overwrite: bool = False, | ||
) -> list[ResourcePath]: | ||
# Docstring inherited. | ||
raise NotImplementedError() | ||
|
||
def exists( | ||
self, | ||
dataset_ref_or_type: DatasetRef | DatasetType | str, | ||
/, | ||
data_id: DataId | None = None, | ||
*, | ||
full_check: bool = True, | ||
collections: Any = None, | ||
**kwargs: Any, | ||
) -> DatasetExistence: | ||
# Docstring inherited. | ||
raise NotImplementedError() | ||
|
||
def _exists_many( | ||
self, | ||
refs: Iterable[DatasetRef], | ||
/, | ||
*, | ||
full_check: bool = True, | ||
) -> dict[DatasetRef, DatasetExistence]: | ||
# Docstring inherited. | ||
raise NotImplementedError() | ||
|
||
def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: | ||
# Docstring inherited. | ||
raise NotImplementedError() | ||
|
||
def ingest( | ||
self, | ||
*datasets: FileDataset, | ||
transfer: str | None = "auto", | ||
run: str | None = None, | ||
idGenerationMode: DatasetIdGenEnum | None = None, | ||
record_validation_info: bool = True, | ||
) -> None: | ||
# Docstring inherited. | ||
raise NotImplementedError() | ||
|
||
def export( | ||
self, | ||
*, | ||
directory: str | None = None, | ||
filename: str | None = None, | ||
format: str | None = None, | ||
transfer: str | None = None, | ||
) -> AbstractContextManager[RepoExportContext]: | ||
# Docstring inherited. | ||
raise NotImplementedError() | ||
|
||
def import_( | ||
self, | ||
*, | ||
directory: ResourcePathExpression | None = None, | ||
filename: ResourcePathExpression | TextIO | None = None, | ||
format: str | None = None, | ||
transfer: str | None = None, | ||
skip_dimensions: set | None = None, | ||
) -> None: | ||
# Docstring inherited. | ||
raise NotImplementedError() | ||
|
||
def transfer_from( | ||
self, | ||
source_butler: LimitedButler, | ||
source_refs: Iterable[DatasetRef], | ||
transfer: str = "auto", | ||
skip_missing: bool = True, | ||
register_dataset_types: bool = False, | ||
transfer_dimensions: bool = False, | ||
) -> Collection[DatasetRef]: | ||
# Docstring inherited. | ||
raise NotImplementedError() | ||
|
||
def validateConfiguration( | ||
self, | ||
logFailures: bool = False, | ||
datasetTypeNames: Iterable[str] | None = None, | ||
ignore: Iterable[str] | None = None, | ||
) -> None: | ||
# Docstring inherited. | ||
raise NotImplementedError() | ||
|
||
@property | ||
def collections(self) -> Sequence[str]: | ||
# Docstring inherited. | ||
return self._registry_defaults.collections | ||
|
||
@property | ||
def run(self) -> str | None: | ||
# Docstring inherited. | ||
return self._registry_defaults.run | ||
|
||
@property | ||
def registry(self) -> Registry: | ||
# Docstring inherited. | ||
raise NotImplementedError() | ||
|
||
def pruneDatasets( | ||
self, | ||
refs: Iterable[DatasetRef], | ||
*, | ||
disassociate: bool = True, | ||
unstore: bool = False, | ||
tags: Iterable[str] = (), | ||
purge: bool = False, | ||
) -> None: | ||
# Docstring inherited. | ||
raise NotImplementedError() | ||
|
||
def _get_url(self, path: str, version: str = "v1") -> str: | ||
"""Form the complete path to an endpoint on the server | ||
|
||
Parameters | ||
---------- | ||
path : `str` | ||
The relative path to the server endpoint. Should not include the | ||
"/butler" prefix. | ||
version : `str`, optional | ||
Version string to prepend to path. Defaults to "v1". | ||
|
||
Returns | ||
------- | ||
path : `str` | ||
The full path to the endpoint | ||
""" | ||
prefix = "butler" | ||
return f"{prefix}/{version}/{path}" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Might need to add
__all__
to all these files. We do always tend to do the__all__
in the files and thenfrom x import *
variant in the__init__.py
. I think that it also helps sphinx to know what docs should be built and doesn't get confused by other symbols being imported from elsewhere.