Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-41162: Create minimalist RemoteButler client and FastAPI server #897

Merged
merged 8 commits into from
Oct 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions python/lsst/daf/butler/remote_butler/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from ._remote_butler import *
38 changes: 38 additions & 0 deletions python/lsst/daf/butler/remote_butler/_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might need to add __all__ to all these files. We do always tend to do the __all__ in the files and then from x import * variant in the __init__.py. I think that it also helps sphinx to know what docs should be built and doesn't get confused by other symbols being imported from elsewhere.

from pydantic import AnyHttpUrl

from .._compat import _BaseModelCompat


class RemoteButlerOptionsModel(_BaseModelCompat):
url: AnyHttpUrl


class RemoteButlerConfigModel(_BaseModelCompat):
remote_butler: RemoteButlerOptionsModel
319 changes: 319 additions & 0 deletions python/lsst/daf/butler/remote_butler/_remote_butler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,319 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__all__ = ("RemoteButler",)

from collections.abc import Collection, Iterable, Sequence
from contextlib import AbstractContextManager
from typing import Any, TextIO

import httpx
from lsst.daf.butler import __version__
from lsst.resources import ResourcePath, ResourcePathExpression
from lsst.utils.introspection import get_full_type_name

from .._butler import Butler
from .._butler_config import ButlerConfig
from .._config import Config
from .._dataset_existence import DatasetExistence
from .._dataset_ref import DatasetIdGenEnum, DatasetRef
from .._dataset_type import DatasetType
from .._deferredDatasetHandle import DeferredDatasetHandle
from .._file_dataset import FileDataset
from .._limited_butler import LimitedButler
from .._storage_class import StorageClass
from ..datastore import DatasetRefURIs
from ..dimensions import DataId, DimensionConfig, DimensionUniverse
from ..registry import Registry, RegistryDefaults
from ..transfers import RepoExportContext
from ._config import RemoteButlerConfigModel


class RemoteButler(Butler):
def __init__(
self,
# These parameters are inherited from the Butler() constructor
config: Config | ResourcePathExpression | None = None,
*,
collections: Any = None,
run: str | None = None,
searchPaths: Sequence[ResourcePathExpression] | None = None,
writeable: bool | None = None,
inferDefaults: bool = True,
# Parameters unique to RemoteButler
http_client: httpx.Client | None = None,
**kwargs: Any,
):
butler_config = ButlerConfig(config, searchPaths, without_datastore=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a note eventually the datastore will be needed.

self._config = RemoteButlerConfigModel.model_validate(butler_config)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this validate because ButlerConfig looks like a generic mapping and it happens to have a remote_butler.url in the hierarchy? In general ButlerConfig is not a pydantic model even though we have talked about it being one. What happens to all the other parts of ButlerConfig?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's correct. Currently the other parts of ButlerConfig are being discarded since we're not using them. I figured as we added usage of the other parts of ButlerConfig, we could add the appropriate validation for them.

It's still not clear to me which parts of the existing DirectButler ButlerConfig are generated internally in the client code, which are potentially configured locally in a client-side config file, and which are only server-side concepts that the client never sees in a configuration file.

I do wonder if some portion of the configuration would be returned from a versioned "init" endpoint (which maybe gives you back some config chunks, server capabilities, dimensions, collection names, etc all in one shot.) If there is only one "config file" endpoint that configuration format becomes an unversionable permanent part of the API

self._dimensions: DimensionUniverse | None = None
# TODO: RegistryDefaults should have finish() called on it, but this
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we also need to work out what to do with kwargs since people can say instrument="LATISS" and then in the defaults we check that that's a real instrument so we can default it in later queries when there is a need for an instrument. We can decide what to do about that later. It will fold in to how Query objects work and maybe we will pass the defaults to the Query object and let the server sort it out. I don't know if we want to pass kwargs to the server for verification/expansion.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that just passing the defaults to the Query object will probably be the right thing. To stop the execution flow from getting too tangled up between the client and server, I think in general it's going to be the right approach to just record what the user asked for and give it to the server to actually do things with.

# requires getCollectionSummary() which is not yet implemented
self._registry_defaults = RegistryDefaults(collections, run, inferDefaults, **kwargs)

if http_client is not None:
# We have injected a client explicitly in to the class.
# This is generally done for testing.
self._client = http_client
else:
headers = {"user-agent": f"{get_full_type_name(self)}/{__version__}"}
self._client = httpx.Client(headers=headers, base_url=str(self._config.remote_butler.url))

def isWriteable(self) -> bool:
# Docstring inherited.
return False

Check warning on line 88 in python/lsst/daf/butler/remote_butler/_remote_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler.py#L88

Added line #L88 was not covered by tests

@property
def dimensions(self) -> DimensionUniverse:
# Docstring inherited.
if self._dimensions is not None:
return self._dimensions

Check warning on line 94 in python/lsst/daf/butler/remote_butler/_remote_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler.py#L94

Added line #L94 was not covered by tests

response = self._client.get(self._get_url("universe"))
response.raise_for_status()

config = DimensionConfig.fromString(response.text, format="json")
self._dimensions = DimensionUniverse(config)
return self._dimensions

def getDatasetType(self, name: str) -> DatasetType:
# Docstring inherited.
raise NotImplementedError()

def transaction(self) -> AbstractContextManager[None]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TallJimbo I'm assuming transaction shouldn't really be in the ABC?

"""Will always raise NotImplementedError.
Transactions are not supported by RemoteButler.
"""
raise NotImplementedError()

def put(
self,
obj: Any,
datasetRefOrType: DatasetRef | DatasetType | str,
/,
dataId: DataId | None = None,
*,
run: str | None = None,
**kwargs: Any,
) -> DatasetRef:
# Docstring inherited.
raise NotImplementedError()

def getDeferred(
self,
datasetRefOrType: DatasetRef | DatasetType | str,
/,
dataId: DataId | None = None,
*,
parameters: dict | None = None,
collections: Any = None,
storageClass: str | StorageClass | None = None,
**kwargs: Any,
) -> DeferredDatasetHandle:
# Docstring inherited.
raise NotImplementedError()

def get(
self,
datasetRefOrType: DatasetRef | DatasetType | str,
/,
dataId: DataId | None = None,
*,
parameters: dict[str, Any] | None = None,
collections: Any = None,
storageClass: StorageClass | str | None = None,
**kwargs: Any,
) -> Any:
# Docstring inherited.
raise NotImplementedError()

def getURIs(
self,
datasetRefOrType: DatasetRef | DatasetType | str,
/,
dataId: DataId | None = None,
*,
predict: bool = False,
collections: Any = None,
run: str | None = None,
**kwargs: Any,
) -> DatasetRefURIs:
# Docstring inherited.
raise NotImplementedError()

def getURI(
self,
datasetRefOrType: DatasetRef | DatasetType | str,
/,
dataId: DataId | None = None,
*,
predict: bool = False,
collections: Any = None,
run: str | None = None,
**kwargs: Any,
) -> ResourcePath:
# Docstring inherited.
raise NotImplementedError()

def retrieveArtifacts(
self,
refs: Iterable[DatasetRef],
destination: ResourcePathExpression,
transfer: str = "auto",
preserve_path: bool = True,
overwrite: bool = False,
) -> list[ResourcePath]:
# Docstring inherited.
raise NotImplementedError()

def exists(
self,
dataset_ref_or_type: DatasetRef | DatasetType | str,
/,
data_id: DataId | None = None,
*,
full_check: bool = True,
collections: Any = None,
**kwargs: Any,
) -> DatasetExistence:
# Docstring inherited.
raise NotImplementedError()

def _exists_many(
self,
refs: Iterable[DatasetRef],
/,
*,
full_check: bool = True,
) -> dict[DatasetRef, DatasetExistence]:
# Docstring inherited.
raise NotImplementedError()

def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None:
# Docstring inherited.
raise NotImplementedError()

def ingest(
self,
*datasets: FileDataset,
transfer: str | None = "auto",
run: str | None = None,
idGenerationMode: DatasetIdGenEnum | None = None,
record_validation_info: bool = True,
) -> None:
# Docstring inherited.
raise NotImplementedError()

def export(
self,
*,
directory: str | None = None,
filename: str | None = None,
format: str | None = None,
transfer: str | None = None,
) -> AbstractContextManager[RepoExportContext]:
# Docstring inherited.
raise NotImplementedError()

def import_(
self,
*,
directory: ResourcePathExpression | None = None,
filename: ResourcePathExpression | TextIO | None = None,
format: str | None = None,
transfer: str | None = None,
skip_dimensions: set | None = None,
) -> None:
# Docstring inherited.
raise NotImplementedError()

def transfer_from(
self,
source_butler: LimitedButler,
source_refs: Iterable[DatasetRef],
transfer: str = "auto",
skip_missing: bool = True,
register_dataset_types: bool = False,
transfer_dimensions: bool = False,
) -> Collection[DatasetRef]:
# Docstring inherited.
raise NotImplementedError()

def validateConfiguration(
self,
logFailures: bool = False,
datasetTypeNames: Iterable[str] | None = None,
ignore: Iterable[str] | None = None,
) -> None:
# Docstring inherited.
raise NotImplementedError()

@property
def collections(self) -> Sequence[str]:
# Docstring inherited.
return self._registry_defaults.collections

@property
def run(self) -> str | None:
# Docstring inherited.
return self._registry_defaults.run

@property
def registry(self) -> Registry:
# Docstring inherited.
raise NotImplementedError()

def pruneDatasets(
self,
refs: Iterable[DatasetRef],
*,
disassociate: bool = True,
unstore: bool = False,
tags: Iterable[str] = (),
purge: bool = False,
) -> None:
# Docstring inherited.
raise NotImplementedError()

def _get_url(self, path: str, version: str = "v1") -> str:
"""Form the complete path to an endpoint on the server

Parameters
----------
path : `str`
The relative path to the server endpoint. Should not include the
"/butler" prefix.
version : `str`, optional
Version string to prepend to path. Defaults to "v1".

Returns
-------
path : `str`
The full path to the endpoint
"""
prefix = "butler"
return f"{prefix}/{version}/{path}"
Loading
Loading