Skip to content

Commit

Permalink
Make Butler._clone more user-friendly
Browse files Browse the repository at this point in the history
Butler._clone now copies default values from the original instance unless they are explicitly overridden.
  • Loading branch information
dhirving committed Sep 12, 2024
1 parent 7d9b803 commit 840147f
Show file tree
Hide file tree
Showing 6 changed files with 116 additions and 34 deletions.
27 changes: 21 additions & 6 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from abc import abstractmethod
from collections.abc import Collection, Iterable, Mapping, Sequence
from contextlib import AbstractContextManager
from types import EllipsisType
from typing import TYPE_CHECKING, Any, TextIO

from lsst.resources import ResourcePath, ResourcePathExpression
Expand Down Expand Up @@ -63,7 +64,7 @@
from .datastore import DatasetRefURIs
from .dimensions import DataId, DimensionGroup, DimensionRecord
from .queries import Query
from .registry import Registry
from .registry import CollectionArgType, Registry
from .transfers import RepoExportContext

_LOG = getLogger(__name__)
Expand Down Expand Up @@ -1826,17 +1827,31 @@ def query_dimension_records(
raise EmptyQueryResultError(list(result.explain_no_results()))
return dimension_records

@abstractmethod
def _clone(
self,
*,
collections: Any = None,
run: str | None = None,
inferDefaults: bool = True,
collections: CollectionArgType | None | EllipsisType = ...,
run: str | None | EllipsisType = ...,
inferDefaults: bool | EllipsisType = ...,
**kwargs: Any,
) -> Butler:
"""Return a new Butler instance connected to the same repository
as this one, but overriding ``collections``, ``run``,
as this one, optionally overriding ``collections``, ``run``,
``inferDefaults``, and default data ID.
Parameters
----------
collections : `~lsst.daf.butler.registry.CollectionArgType` or `None`,\
optional
Same as constructor. If omitted, uses value from original object.
run : `str` or `None`, optional
Same as constructor. If `None`, no default run is used. If
omitted, copies value from original object.
inferDefaults : `bool`, optional
Same as constructor. If omitted, copies value from original
object.
**kwargs : `str`
Same as constructor. If none are provided, copies values from
original object.
"""
raise NotImplementedError()
11 changes: 6 additions & 5 deletions python/lsst/daf/butler/direct_butler/_direct_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import warnings
from collections import Counter, defaultdict
from collections.abc import Iterable, Iterator, MutableMapping, Sequence
from types import EllipsisType
from typing import TYPE_CHECKING, Any, ClassVar, TextIO, cast

from deprecated.sphinx import deprecated
Expand Down Expand Up @@ -89,7 +90,7 @@
from .._file_dataset import FileDataset
from ..datastore import DatasetRefURIs
from ..dimensions import DataId, DataIdValue, DimensionElement, DimensionRecord, DimensionUniverse
from ..registry import Registry
from ..registry import CollectionArgType, Registry
from ..transfers import RepoImportBackend

_LOG = getLogger(__name__)
Expand Down Expand Up @@ -218,13 +219,13 @@ def create_from_config(
def _clone(
self,
*,
collections: Any = None,
run: str | None = None,
inferDefaults: bool = True,
collections: CollectionArgType | None | EllipsisType = ...,
run: str | None | EllipsisType = ...,
inferDefaults: bool | EllipsisType = ...,
**kwargs: Any,
) -> DirectButler:
# Docstring inherited
defaults = RegistryDefaults(collections=collections, run=run, infer=inferDefaults, **kwargs)
defaults = self._registry.defaults.clone(collections, run, inferDefaults, kwargs)
registry = self._registry.copy(defaults)

return DirectButler(
Expand Down
76 changes: 75 additions & 1 deletion python/lsst/daf/butler/registry/_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,19 @@

import contextlib
from collections.abc import Sequence, Set
from types import EllipsisType
from typing import TYPE_CHECKING, Any

from lsst.utils.classes import immutable

from .._butler_instance_options import ButlerInstanceOptions
from .._exceptions import MissingCollectionError
from ..dimensions import DataCoordinate
from ._collection_summary import CollectionSummary
from .wildcards import CollectionWildcard

if TYPE_CHECKING:
from ..registry import Registry
from ..registry import CollectionArgType, Registry
from .sql_registry import SqlRegistry


Expand Down Expand Up @@ -84,6 +86,7 @@ class RegistryDefaults:
"""

def __init__(self, collections: Any = None, run: str | None = None, infer: bool = True, **kwargs: str):
self._original_collection_was_none = collections is None
if collections is None:
if run is not None:
collections = (run,)
Expand All @@ -109,6 +112,77 @@ def from_data_id(data_id: DataCoordinate) -> RegistryDefaults:
defaults._finished = True
return defaults

@staticmethod
def from_butler_instance_options(options: ButlerInstanceOptions) -> RegistryDefaults:
"""Create a `RegistryDefaults` object from the values specified by a
`ButlerInstanceOptions` object.
Parameters
----------
options : `ButlerInstanceOptions`
Butler options object.
"""
return RegistryDefaults(
collections=options.collections, run=options.run, infer=options.inferDefaults, **options.kwargs
)

def clone(
self,
collections: CollectionArgType | None | EllipsisType = ...,
run: str | None | EllipsisType = ...,
inferDefaults: bool | EllipsisType = ...,
dataId: dict[str, str] | EllipsisType = ...,
) -> RegistryDefaults:
"""Make a copy of this RegistryDefaults object, optionally modifying
values.
Parameters
----------
collections : `~lsst.daf.butler.registry.CollectionArgType` or `None`,\
optional
Same as constructor. If omitted, uses value from original object.
run : `str` or `None`, optional
Same as constructor. If `None`, no default run is used. If
omitted, copies value from original object.
inferDefaults : `bool`, optional
Same as constructor. If omitted, copies value from original
object.
dataId : `dict` [ `str` , `str` ]
Same as ``kwargs`` arguments to constructor. If empty or omitted,
copies values from original object.
Returns
-------
defaults : `RegistryDefaults`
New instance if any changes were made, otherwise the original
instance.
Notes
-----
``finish()`` must be called on the returned object to complete
initialization.
"""
if collections is ... and run is ... and inferDefaults is ... and not dataId:
# Unmodified copy -- this object is immutable so we can just return
# it and avoid the need for database queries in finish().
return self

if collections is ...:
if self._original_collection_was_none:
# Ensure that defaulting collections to the run collection
# works the same as the constructor.
collections = None
else:
collections = self.collections
if run is ...:
run = self.run
if inferDefaults is ...:
inferDefaults = self._infer
if not dataId or dataId is ...:
kwargs = self._kwargs

return RegistryDefaults(collections, run, inferDefaults, **kwargs)

def __repr__(self) -> str:
collections = f"collections={self.collections!r}" if self.collections else ""
run = f"run={self.run!r}" if self.run else ""
Expand Down
3 changes: 2 additions & 1 deletion python/lsst/daf/butler/remote_butler/_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

from .._butler_config import ButlerConfig
from .._butler_instance_options import ButlerInstanceOptions
from ..registry import RegistryDefaults
from ._authentication import get_authentication_token_from_environment
from ._config import RemoteButlerConfigModel
from ._http_connection import RemoteButlerHttpConnection
Expand Down Expand Up @@ -110,7 +111,7 @@ def create_butler_for_access_token(
connection=RemoteButlerHttpConnection(
http_client=self.http_client, server_url=self.server_url, access_token=access_token
),
options=butler_options,
defaults=RegistryDefaults.from_butler_instance_options(butler_options),
cache=self._cache,
use_disabled_datastore_cache=use_disabled_datastore_cache,
)
Expand Down
24 changes: 7 additions & 17 deletions python/lsst/daf/butler/remote_butler/_remote_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from collections.abc import Collection, Iterable, Iterator, Sequence
from contextlib import AbstractContextManager, contextmanager
from dataclasses import dataclass
from types import EllipsisType
from typing import TYPE_CHECKING, Any, TextIO, cast

from deprecated.sphinx import deprecated
Expand All @@ -47,7 +48,6 @@

from .._butler import Butler
from .._butler_collections import ButlerCollections
from .._butler_instance_options import ButlerInstanceOptions
from .._dataset_existence import DatasetExistence
from .._dataset_ref import DatasetId, DatasetRef
from .._dataset_type import DatasetType
Expand Down Expand Up @@ -129,7 +129,7 @@ def __new__(
cls,
*,
connection: RemoteButlerHttpConnection,
options: ButlerInstanceOptions,
defaults: RegistryDefaults,
cache: RemoteButlerCache,
use_disabled_datastore_cache: bool = True,
) -> RemoteButler:
Expand All @@ -141,7 +141,6 @@ def __new__(
self._datastore_cache_manager = None
self._use_disabled_datastore_cache = use_disabled_datastore_cache

defaults = RegistryDefaults(options.collections, options.run, options.inferDefaults, **options.kwargs)
self._registry_defaults = DefaultsHolder(defaults)
self._registry = RemoteButlerRegistry(self, self._registry_defaults, self._connection)
defaults.finish(self._registry)
Expand Down Expand Up @@ -595,22 +594,13 @@ def _normalize_collections(self, collections: CollectionArgType | None) -> Colle
def _clone(
self,
*,
collections: Any = None,
run: str | None = None,
inferDefaults: bool = True,
collections: CollectionArgType | None | EllipsisType = ...,
run: str | None | EllipsisType = ...,
inferDefaults: bool | EllipsisType = ...,
**kwargs: Any,
) -> RemoteButler:
return RemoteButler(
connection=self._connection,
cache=self._cache,
options=ButlerInstanceOptions(
collections=collections,
run=run,
writeable=self.isWriteable(),
inferDefaults=inferDefaults,
kwargs=kwargs,
),
)
defaults = self._registry_defaults.get().clone(collections, run, inferDefaults, kwargs)
return RemoteButler(connection=self._connection, cache=self._cache, defaults=defaults)

def __str__(self) -> str:
return f"RemoteButler({self._connection.server_url})"
Expand Down
9 changes: 5 additions & 4 deletions python/lsst/daf/butler/tests/hybrid_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

from collections.abc import Collection, Iterable, Sequence
from contextlib import AbstractContextManager
from types import EllipsisType
from typing import Any, TextIO, cast

from lsst.resources import ResourcePath, ResourcePathExpression
Expand All @@ -47,7 +48,7 @@
from ..dimensions import DataCoordinate, DataId, DimensionElement, DimensionRecord, DimensionUniverse
from ..direct_butler import DirectButler
from ..queries import Query
from ..registry import Registry
from ..registry import CollectionArgType, Registry
from ..remote_butler import RemoteButler
from ..transfers import RepoExportContext
from .hybrid_butler_collections import HybridButlerCollections
Expand Down Expand Up @@ -331,9 +332,9 @@ def query(self) -> AbstractContextManager[Query]:
def _clone(
self,
*,
collections: Any = None,
run: str | None = None,
inferDefaults: bool = True,
collections: CollectionArgType | None | EllipsisType = ...,
run: str | None | EllipsisType = ...,
inferDefaults: bool | EllipsisType = ...,
**kwargs: Any,
) -> HybridButler:
remote_butler = self._remote_butler._clone(
Expand Down

0 comments on commit 840147f

Please sign in to comment.