Skip to content

Commit

Permalink
Merge pull request #1078 from lsst/tickets/DM-46298
Browse files Browse the repository at this point in the history
DM-46298: Make Butler.clone public
  • Loading branch information
dhirving committed Sep 16, 2024
2 parents 7d9b803 + 91556de commit da89c37
Show file tree
Hide file tree
Showing 14 changed files with 201 additions and 62 deletions.
1 change: 1 addition & 0 deletions doc/changes/DM-46298.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added `Butler.clone()`, which lets you make a copy of a Butler instance, optionally overriding default collections/run/data ID.
38 changes: 26 additions & 12 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from abc import abstractmethod
from collections.abc import Collection, Iterable, Mapping, Sequence
from contextlib import AbstractContextManager
from types import EllipsisType
from typing import TYPE_CHECKING, Any, TextIO

from lsst.resources import ResourcePath, ResourcePathExpression
Expand Down Expand Up @@ -63,7 +64,7 @@
from .datastore import DatasetRefURIs
from .dimensions import DataId, DimensionGroup, DimensionRecord
from .queries import Query
from .registry import Registry
from .registry import CollectionArgType, Registry
from .transfers import RepoExportContext

_LOG = getLogger(__name__)
Expand Down Expand Up @@ -275,9 +276,8 @@ def from_config(
collection arguments.
"""
# DirectButler used to have a way to specify a "copy constructor" by
# passing the "butler" parameter to its constructor. This
# functionality has been moved out of the constructor into
# Butler._clone(), but the new interface is not public yet.
# passing the "butler" parameter to its constructor. This has
# been moved out of the constructor into Butler.clone().
butler = kwargs.pop("butler", None)
if butler is not None:
if not isinstance(butler, Butler):
Expand All @@ -286,7 +286,7 @@ def from_config(
raise TypeError(
"Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument."
)
return butler._clone(collections=collections, run=run, inferDefaults=inferDefaults, **kwargs)
return butler.clone(collections=collections, run=run, inferDefaults=inferDefaults, dataId=kwargs)

options = ButlerInstanceOptions(
collections=collections, run=run, writeable=writeable, inferDefaults=inferDefaults, kwargs=kwargs
Expand Down Expand Up @@ -1826,17 +1826,31 @@ def query_dimension_records(
raise EmptyQueryResultError(list(result.explain_no_results()))
return dimension_records

@abstractmethod
def _clone(
def clone(
self,
*,
collections: Any = None,
run: str | None = None,
inferDefaults: bool = True,
**kwargs: Any,
collections: CollectionArgType | None | EllipsisType = ...,
run: str | None | EllipsisType = ...,
inferDefaults: bool | EllipsisType = ...,
dataId: dict[str, str] | EllipsisType = ...,
) -> Butler:
"""Return a new Butler instance connected to the same repository
as this one, but overriding ``collections``, ``run``,
as this one, optionally overriding ``collections``, ``run``,
``inferDefaults``, and default data ID.
Parameters
----------
collections : `~lsst.daf.butler.registry.CollectionArgType` or `None`,\
optional
Same as constructor. If omitted, uses value from original object.
run : `str` or `None`, optional
Same as constructor. If `None`, no default run is used. If
omitted, copies value from original object.
inferDefaults : `bool`, optional
Same as constructor. If omitted, copies value from original
object.
dataId : `str`
Same as ``kwargs`` passed to the constructor. If omitted, copies
values from original object.
"""
raise NotImplementedError()
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/_labeled_butler_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def _create_direct_butler_factory(config: ButlerConfig, preload_cache: bool) ->
def create_butler(access_token: str | None) -> Butler:
# Access token is ignored because DirectButler does not use Gafaelfawr
# authentication.
return butler._clone()
return butler.clone()

return create_butler

Expand Down
19 changes: 9 additions & 10 deletions python/lsst/daf/butler/direct_butler/_direct_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import warnings
from collections import Counter, defaultdict
from collections.abc import Iterable, Iterator, MutableMapping, Sequence
from types import EllipsisType
from typing import TYPE_CHECKING, Any, ClassVar, TextIO, cast

from deprecated.sphinx import deprecated
Expand Down Expand Up @@ -89,7 +90,7 @@
from .._file_dataset import FileDataset
from ..datastore import DatasetRefURIs
from ..dimensions import DataId, DataIdValue, DimensionElement, DimensionRecord, DimensionUniverse
from ..registry import Registry
from ..registry import CollectionArgType, Registry
from ..transfers import RepoImportBackend

_LOG = getLogger(__name__)
Expand Down Expand Up @@ -182,9 +183,7 @@ def create_from_config(
if "run" in config or "collection" in config:
raise ValueError("Passing a run or collection via configuration is no longer supported.")

defaults = RegistryDefaults(
collections=options.collections, run=options.run, infer=options.inferDefaults, **options.kwargs
)
defaults = RegistryDefaults.from_butler_instance_options(options)
try:
butlerRoot = config.get("root", config.configDir)
writeable = options.writeable
Expand Down Expand Up @@ -215,16 +214,16 @@ def create_from_config(
_LOG.error(f"Failed to instantiate Butler from config {config.configFile}.")
raise

def _clone(
def clone(
self,
*,
collections: Any = None,
run: str | None = None,
inferDefaults: bool = True,
**kwargs: Any,
collections: CollectionArgType | None | EllipsisType = ...,
run: str | None | EllipsisType = ...,
inferDefaults: bool | EllipsisType = ...,
dataId: dict[str, str] | EllipsisType = ...,
) -> DirectButler:
# Docstring inherited
defaults = RegistryDefaults(collections=collections, run=run, infer=inferDefaults, **kwargs)
defaults = self._registry.defaults.clone(collections, run, inferDefaults, dataId)
registry = self._registry.copy(defaults)

return DirectButler(
Expand Down
77 changes: 76 additions & 1 deletion python/lsst/daf/butler/registry/_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,19 @@

import contextlib
from collections.abc import Sequence, Set
from types import EllipsisType
from typing import TYPE_CHECKING, Any

from lsst.utils.classes import immutable

from .._butler_instance_options import ButlerInstanceOptions
from .._exceptions import MissingCollectionError
from ..dimensions import DataCoordinate
from ._collection_summary import CollectionSummary
from .wildcards import CollectionWildcard

if TYPE_CHECKING:
from ..registry import Registry
from ..registry import CollectionArgType, Registry
from .sql_registry import SqlRegistry


Expand Down Expand Up @@ -84,6 +86,8 @@ class RegistryDefaults:
"""

def __init__(self, collections: Any = None, run: str | None = None, infer: bool = True, **kwargs: str):
self._original_collection_was_none = collections is None
self._original_kwargs = dict(kwargs)
if collections is None:
if run is not None:
collections = (run,)
Expand All @@ -109,6 +113,77 @@ def from_data_id(data_id: DataCoordinate) -> RegistryDefaults:
defaults._finished = True
return defaults

@staticmethod
def from_butler_instance_options(options: ButlerInstanceOptions) -> RegistryDefaults:
"""Create a `RegistryDefaults` object from the values specified by a
`ButlerInstanceOptions` object.
Parameters
----------
options : `ButlerInstanceOptions`
Butler options object.
"""
return RegistryDefaults(
collections=options.collections, run=options.run, infer=options.inferDefaults, **options.kwargs
)

def clone(
self,
collections: CollectionArgType | None | EllipsisType = ...,
run: str | None | EllipsisType = ...,
inferDefaults: bool | EllipsisType = ...,
dataId: dict[str, str] | EllipsisType = ...,
) -> RegistryDefaults:
"""Make a copy of this RegistryDefaults object, optionally modifying
values.
Parameters
----------
collections : `~lsst.daf.butler.registry.CollectionArgType` or `None`,\
optional
Same as constructor. If omitted, uses value from original object.
run : `str` or `None`, optional
Same as constructor. If `None`, no default run is used. If
omitted, copies value from original object.
inferDefaults : `bool`, optional
Same as constructor. If omitted, copies value from original
object.
dataId : `dict` [ `str` , `str` ]
Same as ``kwargs`` arguments to constructor. If omitted, copies
values from original object.
Returns
-------
defaults : `RegistryDefaults`
New instance if any changes were made, otherwise the original
instance.
Notes
-----
``finish()`` must be called on the returned object to complete
initialization.
"""
if collections is ... and run is ... and inferDefaults is ... and dataId is ...:
# Unmodified copy -- this object is immutable so we can just return
# it and avoid the need for database queries in finish().
return self

if collections is ...:
if self._original_collection_was_none:
# Ensure that defaulting collections to the run collection
# works the same as the constructor.
collections = None
else:
collections = self.collections
if run is ...:
run = self.run
if inferDefaults is ...:
inferDefaults = self._infer
if dataId is ...:
dataId = self._original_kwargs

return RegistryDefaults(collections, run, inferDefaults, **dataId)

def __repr__(self) -> str:
collections = f"collections={self.collections!r}" if self.collections else ""
run = f"run={self.run!r}" if self.run else ""
Expand Down
4 changes: 2 additions & 2 deletions python/lsst/daf/butler/registry/tests/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,7 @@ def testCollections(self):
"""Tests for registry methods that manage collections."""
butler = self.make_butler()
registry = butler.registry
other_registry = butler._clone().registry
other_registry = butler.clone().registry
self.load_data(butler, "base.yaml", "datasets.yaml")
run1 = "imported_g"
run2 = "imported_r"
Expand Down Expand Up @@ -975,7 +975,7 @@ def _do_collection_concurrency_test(

# Set up two registries pointing to the same DB
butler1 = self.make_butler()
butler2 = butler1._clone()
butler2 = butler1.clone()
registry1 = butler1._registry
assert isinstance(registry1, SqlRegistry)
registry2 = butler2._registry
Expand Down
3 changes: 2 additions & 1 deletion python/lsst/daf/butler/remote_butler/_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

from .._butler_config import ButlerConfig
from .._butler_instance_options import ButlerInstanceOptions
from ..registry import RegistryDefaults
from ._authentication import get_authentication_token_from_environment
from ._config import RemoteButlerConfigModel
from ._http_connection import RemoteButlerHttpConnection
Expand Down Expand Up @@ -110,7 +111,7 @@ def create_butler_for_access_token(
connection=RemoteButlerHttpConnection(
http_client=self.http_client, server_url=self.server_url, access_token=access_token
),
options=butler_options,
defaults=RegistryDefaults.from_butler_instance_options(butler_options),
cache=self._cache,
use_disabled_datastore_cache=use_disabled_datastore_cache,
)
Expand Down
28 changes: 9 additions & 19 deletions python/lsst/daf/butler/remote_butler/_remote_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from collections.abc import Collection, Iterable, Iterator, Sequence
from contextlib import AbstractContextManager, contextmanager
from dataclasses import dataclass
from types import EllipsisType
from typing import TYPE_CHECKING, Any, TextIO, cast

from deprecated.sphinx import deprecated
Expand All @@ -47,7 +48,6 @@

from .._butler import Butler
from .._butler_collections import ButlerCollections
from .._butler_instance_options import ButlerInstanceOptions
from .._dataset_existence import DatasetExistence
from .._dataset_ref import DatasetId, DatasetRef
from .._dataset_type import DatasetType
Expand Down Expand Up @@ -129,7 +129,7 @@ def __new__(
cls,
*,
connection: RemoteButlerHttpConnection,
options: ButlerInstanceOptions,
defaults: RegistryDefaults,
cache: RemoteButlerCache,
use_disabled_datastore_cache: bool = True,
) -> RemoteButler:
Expand All @@ -141,7 +141,6 @@ def __new__(
self._datastore_cache_manager = None
self._use_disabled_datastore_cache = use_disabled_datastore_cache

defaults = RegistryDefaults(options.collections, options.run, options.inferDefaults, **options.kwargs)
self._registry_defaults = DefaultsHolder(defaults)
self._registry = RemoteButlerRegistry(self, self._registry_defaults, self._connection)
defaults.finish(self._registry)
Expand Down Expand Up @@ -592,25 +591,16 @@ def _normalize_collections(self, collections: CollectionArgType | None) -> Colle
collections = self.collections
return convert_collection_arg_to_glob_string_list(collections)

def _clone(
def clone(
self,
*,
collections: Any = None,
run: str | None = None,
inferDefaults: bool = True,
**kwargs: Any,
collections: CollectionArgType | None | EllipsisType = ...,
run: str | None | EllipsisType = ...,
inferDefaults: bool | EllipsisType = ...,
dataId: dict[str, str] | EllipsisType = ...,
) -> RemoteButler:
return RemoteButler(
connection=self._connection,
cache=self._cache,
options=ButlerInstanceOptions(
collections=collections,
run=run,
writeable=self.isWriteable(),
inferDefaults=inferDefaults,
kwargs=kwargs,
),
)
defaults = self._registry_defaults.get().clone(collections, run, inferDefaults, dataId)
return RemoteButler(connection=self._connection, cache=self._cache, defaults=defaults)

def __str__(self) -> str:
return f"RemoteButler({self._connection.server_url})"
Expand Down
21 changes: 11 additions & 10 deletions python/lsst/daf/butler/tests/hybrid_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

from collections.abc import Collection, Iterable, Sequence
from contextlib import AbstractContextManager
from types import EllipsisType
from typing import Any, TextIO, cast

from lsst.resources import ResourcePath, ResourcePathExpression
Expand All @@ -47,7 +48,7 @@
from ..dimensions import DataCoordinate, DataId, DimensionElement, DimensionRecord, DimensionUniverse
from ..direct_butler import DirectButler
from ..queries import Query
from ..registry import Registry
from ..registry import CollectionArgType, Registry
from ..remote_butler import RemoteButler
from ..transfers import RepoExportContext
from .hybrid_butler_collections import HybridButlerCollections
Expand Down Expand Up @@ -328,19 +329,19 @@ def registry(self) -> Registry:
def query(self) -> AbstractContextManager[Query]:
return self._remote_butler.query()

def _clone(
def clone(
self,
*,
collections: Any = None,
run: str | None = None,
inferDefaults: bool = True,
**kwargs: Any,
collections: CollectionArgType | None | EllipsisType = ...,
run: str | None | EllipsisType = ...,
inferDefaults: bool | EllipsisType = ...,
dataId: dict[str, str] | EllipsisType = ...,
) -> HybridButler:
remote_butler = self._remote_butler._clone(
collections=collections, run=run, inferDefaults=inferDefaults, **kwargs
remote_butler = self._remote_butler.clone(
collections=collections, run=run, inferDefaults=inferDefaults, dataId=dataId
)
direct_butler = self._direct_butler._clone(
collections=collections, run=run, inferDefaults=inferDefaults, **kwargs
direct_butler = self._direct_butler.clone(
collections=collections, run=run, inferDefaults=inferDefaults, dataId=dataId
)
return HybridButler(remote_butler, direct_butler)

Expand Down
Loading

0 comments on commit da89c37

Please sign in to comment.