Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-41367: Make Butler server deployable #901

Merged
merged 5 commits into from
Nov 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ dynamic = ["version"]

[project.optional-dependencies]
postgres = ["psycopg2"]
server = [
"fastapi",
"safir >= 3.4.0"
]
test = [
"pytest >= 3.2",
"pytest-openfiles >= 0.5.0",
Expand Down
98 changes: 98 additions & 0 deletions python/lsst/daf/butler/remote_butler/_authentication.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__all__ = ()

import os
from fnmatch import fnmatchcase
from urllib.parse import urlparse

_SERVER_WHITELIST = ["*.lsst.cloud"]
_EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY = "BUTLER_RUBIN_ACCESS_TOKEN"
_RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY = "ACCESS_TOKEN"


def get_authentication_token_from_environment(server_url: str) -> str | None:
"""Search the environment for a Rubin Science Platform access token.

The token may come from the following sources in this order:

1. The ``BUTLER_RUBIN_ACCESS_TOKEN`` environment variable.
This environment variable is meant primarily for development use,
running outside the Rubin Science Platform. This token will be sent
to EVERY server that we connect to, so be careful when connecting to
untrusted servers.
2. The ``ACCESS_TOKEN`` environment variable.
This environment variable is provided by the Rubin Science Platform
Jupyter notebooks. It will only be returned if the given ``server_url``
is in a whitelist of servers known to belong to the Rubin Science
Platform. Because this is a long-lived token that can be used to
impersonate the user with their full access rights, it should not be
sent to untrusted servers.

Parameters
----------
server_url : `str`
URL of the Butler server that the caller intends to connect to.

Returns
-------
access_token: `str` or `None`
A Rubin Science Platform access token, or `None` if no token was
configured in the environment.
"""
explicit_butler_token = os.getenv(_EXPLICIT_BUTLER_ACCESS_TOKEN_ENVIRONMENT_KEY)
if explicit_butler_token:
return explicit_butler_token

hostname = urlparse(server_url.lower()).hostname
hostname_in_whitelist = any(
(hostname and fnmatchcase(hostname, pattern) for pattern in _SERVER_WHITELIST)
)
notebook_token = os.getenv(_RSP_JUPYTER_ACCESS_TOKEN_ENVIRONMENT_KEY)
if hostname_in_whitelist and notebook_token:
return notebook_token

return None


def get_authentication_headers(access_token: str) -> dict[str, str]:
"""Return HTTP headers required for authenticating the user via Rubin
Science Platform's Gafaelfawr service.

Parameters
----------
access_token : `str`
Rubin Science Platform access token.

Returns
-------
header_map : `dict` [`str`, `str`]
HTTP header names and values as a mapping from name to value.
"""
# Access tokens are opaque bearer tokens. See https://sqr-069.lsst.io/
return {"Authorization": f"Bearer {access_token}"}
34 changes: 27 additions & 7 deletions python/lsst/daf/butler/remote_butler/_remote_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

import httpx
from lsst.daf.butler import __version__
from lsst.daf.butler.repo_relocation import replaceRoot
from lsst.resources import ResourcePath, ResourcePathExpression
from lsst.utils.introspection import get_full_type_name

Expand All @@ -52,6 +53,7 @@
from ..registry import MissingDatasetTypeError, NoDefaultCollectionError, Registry, RegistryDefaults
from ..registry.wildcards import CollectionWildcard
from ..transfers import RepoExportContext
from ._authentication import get_authentication_headers, get_authentication_token_from_environment
from ._config import RemoteButlerConfigModel
from .server import FindDatasetModel

Expand All @@ -69,10 +71,22 @@
inferDefaults: bool = True,
# Parameters unique to RemoteButler
http_client: httpx.Client | None = None,
access_token: str | None = None,
**kwargs: Any,
):
butler_config = ButlerConfig(config, searchPaths, without_datastore=True)
# There is a convention in Butler config files where <butlerRoot> in a
# configuration option refers to the directory containing the
# configuration file. We allow this for the remote butler's URL so
# that the server doesn't have to know which hostname it is being
# accessed from.
server_url_key = ("remote_butler", "url")
if server_url_key in butler_config:
butler_config[server_url_key] = replaceRoot(
butler_config[server_url_key], butler_config.configDir
)
self._config = RemoteButlerConfigModel.model_validate(butler_config)

self._dimensions: DimensionUniverse | None = None
# TODO: RegistryDefaults should have finish() called on it, but this
# requires getCollectionSummary() which is not yet implemented
Expand All @@ -83,8 +97,16 @@
# This is generally done for testing.
self._client = http_client
else:
server_url = str(self._config.remote_butler.url)
auth_headers = {}
if access_token is None:
access_token = get_authentication_token_from_environment(server_url)
if access_token is not None:
auth_headers = get_authentication_headers(access_token)

Check warning on line 105 in python/lsst/daf/butler/remote_butler/_remote_butler.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/_remote_butler.py#L105

Added line #L105 was not covered by tests

headers = {"user-agent": f"{get_full_type_name(self)}/{__version__}"}
self._client = httpx.Client(headers=headers, base_url=str(self._config.remote_butler.url))
headers.update(auth_headers)
self._client = httpx.Client(headers=headers, base_url=server_url)

def isWriteable(self) -> bool:
# Docstring inherited.
Expand Down Expand Up @@ -420,20 +442,18 @@
raise NotImplementedError()

def _get_url(self, path: str, version: str = "v1") -> str:
"""Form the complete path to an endpoint on the server
"""Form the complete path to an endpoint on the server.

Parameters
----------
path : `str`
The relative path to the server endpoint. Should not include the
"/butler" prefix.
The relative path to the server endpoint.
version : `str`, optional
Version string to prepend to path. Defaults to "v1".

Returns
-------
path : `str`
The full path to the endpoint
The full path to the endpoint.
"""
prefix = "butler"
return f"{prefix}/{version}/{path}"
return f"{version}/{path}"
43 changes: 43 additions & 0 deletions python/lsst/daf/butler/remote_butler/server/_dependencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This software is dual licensed under the GNU General Public License and also
# under a 3-clause BSD license. Recipients may choose which of these licenses
# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
# respectively. If you choose the GPL option then the following text applies
# (but note that there is still no warranty even if you opt for BSD instead):
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from functools import cache

from lsst.daf.butler import Butler

from ._config import get_config_from_env
from ._factory import Factory


@cache
def _make_global_butler() -> Butler:
config = get_config_from_env()
return Butler.from_config(config.config_uri)

Check warning on line 39 in python/lsst/daf/butler/remote_butler/server/_dependencies.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/server/_dependencies.py#L38-L39

Added lines #L38 - L39 were not covered by tests


def factory_dependency() -> Factory:
return Factory(butler=_make_global_butler())

Check warning on line 43 in python/lsst/daf/butler/remote_butler/server/_dependencies.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/remote_butler/server/_dependencies.py#L43

Added line #L43 was not covered by tests
timj marked this conversation as resolved.
Show resolved Hide resolved
Loading
Loading