From 840147f9196c350078f1a700304ee83a1576fe93 Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Thu, 12 Sep 2024 16:45:49 -0700 Subject: [PATCH] Make Butler._clone more user-friendly Butler._clone now copies default values from the original instance unless they are explicitly overridden. --- python/lsst/daf/butler/_butler.py | 27 +++++-- .../butler/direct_butler/_direct_butler.py | 11 +-- python/lsst/daf/butler/registry/_defaults.py | 76 ++++++++++++++++++- .../lsst/daf/butler/remote_butler/_factory.py | 3 +- .../butler/remote_butler/_remote_butler.py | 24 ++---- python/lsst/daf/butler/tests/hybrid_butler.py | 9 ++- 6 files changed, 116 insertions(+), 34 deletions(-) diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py index 1630057df9..e513138af9 100644 --- a/python/lsst/daf/butler/_butler.py +++ b/python/lsst/daf/butler/_butler.py @@ -32,6 +32,7 @@ from abc import abstractmethod from collections.abc import Collection, Iterable, Mapping, Sequence from contextlib import AbstractContextManager +from types import EllipsisType from typing import TYPE_CHECKING, Any, TextIO from lsst.resources import ResourcePath, ResourcePathExpression @@ -63,7 +64,7 @@ from .datastore import DatasetRefURIs from .dimensions import DataId, DimensionGroup, DimensionRecord from .queries import Query - from .registry import Registry + from .registry import CollectionArgType, Registry from .transfers import RepoExportContext _LOG = getLogger(__name__) @@ -1826,17 +1827,31 @@ def query_dimension_records( raise EmptyQueryResultError(list(result.explain_no_results())) return dimension_records - @abstractmethod def _clone( self, *, - collections: Any = None, - run: str | None = None, - inferDefaults: bool = True, + collections: CollectionArgType | None | EllipsisType = ..., + run: str | None | EllipsisType = ..., + inferDefaults: bool | EllipsisType = ..., **kwargs: Any, ) -> Butler: """Return a new Butler instance connected to the same repository - as this one, but overriding ``collections``, ``run``, + as this one, optionally overriding ``collections``, ``run``, ``inferDefaults``, and default data ID. + + Parameters + ---------- + collections : `~lsst.daf.butler.registry.CollectionArgType` or `None`,\ + optional + Same as constructor. If omitted, uses value from original object. + run : `str` or `None`, optional + Same as constructor. If `None`, no default run is used. If + omitted, copies value from original object. + inferDefaults : `bool`, optional + Same as constructor. If omitted, copies value from original + object. + **kwargs : `str` + Same as constructor. If none are provided, copies values from + original object. """ raise NotImplementedError() diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler.py b/python/lsst/daf/butler/direct_butler/_direct_butler.py index b72df7934f..07485f585f 100644 --- a/python/lsst/daf/butler/direct_butler/_direct_butler.py +++ b/python/lsst/daf/butler/direct_butler/_direct_butler.py @@ -44,6 +44,7 @@ import warnings from collections import Counter, defaultdict from collections.abc import Iterable, Iterator, MutableMapping, Sequence +from types import EllipsisType from typing import TYPE_CHECKING, Any, ClassVar, TextIO, cast from deprecated.sphinx import deprecated @@ -89,7 +90,7 @@ from .._file_dataset import FileDataset from ..datastore import DatasetRefURIs from ..dimensions import DataId, DataIdValue, DimensionElement, DimensionRecord, DimensionUniverse - from ..registry import Registry + from ..registry import CollectionArgType, Registry from ..transfers import RepoImportBackend _LOG = getLogger(__name__) @@ -218,13 +219,13 @@ def create_from_config( def _clone( self, *, - collections: Any = None, - run: str | None = None, - inferDefaults: bool = True, + collections: CollectionArgType | None | EllipsisType = ..., + run: str | None | EllipsisType = ..., + inferDefaults: bool | EllipsisType = ..., **kwargs: Any, ) -> DirectButler: # Docstring inherited - defaults = RegistryDefaults(collections=collections, run=run, infer=inferDefaults, **kwargs) + defaults = self._registry.defaults.clone(collections, run, inferDefaults, kwargs) registry = self._registry.copy(defaults) return DirectButler( diff --git a/python/lsst/daf/butler/registry/_defaults.py b/python/lsst/daf/butler/registry/_defaults.py index 46d194a69b..ffd8174aa2 100644 --- a/python/lsst/daf/butler/registry/_defaults.py +++ b/python/lsst/daf/butler/registry/_defaults.py @@ -31,17 +31,19 @@ import contextlib from collections.abc import Sequence, Set +from types import EllipsisType from typing import TYPE_CHECKING, Any from lsst.utils.classes import immutable +from .._butler_instance_options import ButlerInstanceOptions from .._exceptions import MissingCollectionError from ..dimensions import DataCoordinate from ._collection_summary import CollectionSummary from .wildcards import CollectionWildcard if TYPE_CHECKING: - from ..registry import Registry + from ..registry import CollectionArgType, Registry from .sql_registry import SqlRegistry @@ -84,6 +86,7 @@ class RegistryDefaults: """ def __init__(self, collections: Any = None, run: str | None = None, infer: bool = True, **kwargs: str): + self._original_collection_was_none = collections is None if collections is None: if run is not None: collections = (run,) @@ -109,6 +112,77 @@ def from_data_id(data_id: DataCoordinate) -> RegistryDefaults: defaults._finished = True return defaults + @staticmethod + def from_butler_instance_options(options: ButlerInstanceOptions) -> RegistryDefaults: + """Create a `RegistryDefaults` object from the values specified by a + `ButlerInstanceOptions` object. + + Parameters + ---------- + options : `ButlerInstanceOptions` + Butler options object. + """ + return RegistryDefaults( + collections=options.collections, run=options.run, infer=options.inferDefaults, **options.kwargs + ) + + def clone( + self, + collections: CollectionArgType | None | EllipsisType = ..., + run: str | None | EllipsisType = ..., + inferDefaults: bool | EllipsisType = ..., + dataId: dict[str, str] | EllipsisType = ..., + ) -> RegistryDefaults: + """Make a copy of this RegistryDefaults object, optionally modifying + values. + + Parameters + ---------- + collections : `~lsst.daf.butler.registry.CollectionArgType` or `None`,\ + optional + Same as constructor. If omitted, uses value from original object. + run : `str` or `None`, optional + Same as constructor. If `None`, no default run is used. If + omitted, copies value from original object. + inferDefaults : `bool`, optional + Same as constructor. If omitted, copies value from original + object. + dataId : `dict` [ `str` , `str` ] + Same as ``kwargs`` arguments to constructor. If empty or omitted, + copies values from original object. + + Returns + ------- + defaults : `RegistryDefaults` + New instance if any changes were made, otherwise the original + instance. + + Notes + ----- + ``finish()`` must be called on the returned object to complete + initialization. + """ + if collections is ... and run is ... and inferDefaults is ... and not dataId: + # Unmodified copy -- this object is immutable so we can just return + # it and avoid the need for database queries in finish(). + return self + + if collections is ...: + if self._original_collection_was_none: + # Ensure that defaulting collections to the run collection + # works the same as the constructor. + collections = None + else: + collections = self.collections + if run is ...: + run = self.run + if inferDefaults is ...: + inferDefaults = self._infer + if not dataId or dataId is ...: + kwargs = self._kwargs + + return RegistryDefaults(collections, run, inferDefaults, **kwargs) + def __repr__(self) -> str: collections = f"collections={self.collections!r}" if self.collections else "" run = f"run={self.run!r}" if self.run else "" diff --git a/python/lsst/daf/butler/remote_butler/_factory.py b/python/lsst/daf/butler/remote_butler/_factory.py index de18a5b4e5..fc7de6ef41 100644 --- a/python/lsst/daf/butler/remote_butler/_factory.py +++ b/python/lsst/daf/butler/remote_butler/_factory.py @@ -34,6 +34,7 @@ from .._butler_config import ButlerConfig from .._butler_instance_options import ButlerInstanceOptions +from ..registry import RegistryDefaults from ._authentication import get_authentication_token_from_environment from ._config import RemoteButlerConfigModel from ._http_connection import RemoteButlerHttpConnection @@ -110,7 +111,7 @@ def create_butler_for_access_token( connection=RemoteButlerHttpConnection( http_client=self.http_client, server_url=self.server_url, access_token=access_token ), - options=butler_options, + defaults=RegistryDefaults.from_butler_instance_options(butler_options), cache=self._cache, use_disabled_datastore_cache=use_disabled_datastore_cache, ) diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py index 382a32cd93..bddac26517 100644 --- a/python/lsst/daf/butler/remote_butler/_remote_butler.py +++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py @@ -33,6 +33,7 @@ from collections.abc import Collection, Iterable, Iterator, Sequence from contextlib import AbstractContextManager, contextmanager from dataclasses import dataclass +from types import EllipsisType from typing import TYPE_CHECKING, Any, TextIO, cast from deprecated.sphinx import deprecated @@ -47,7 +48,6 @@ from .._butler import Butler from .._butler_collections import ButlerCollections -from .._butler_instance_options import ButlerInstanceOptions from .._dataset_existence import DatasetExistence from .._dataset_ref import DatasetId, DatasetRef from .._dataset_type import DatasetType @@ -129,7 +129,7 @@ def __new__( cls, *, connection: RemoteButlerHttpConnection, - options: ButlerInstanceOptions, + defaults: RegistryDefaults, cache: RemoteButlerCache, use_disabled_datastore_cache: bool = True, ) -> RemoteButler: @@ -141,7 +141,6 @@ def __new__( self._datastore_cache_manager = None self._use_disabled_datastore_cache = use_disabled_datastore_cache - defaults = RegistryDefaults(options.collections, options.run, options.inferDefaults, **options.kwargs) self._registry_defaults = DefaultsHolder(defaults) self._registry = RemoteButlerRegistry(self, self._registry_defaults, self._connection) defaults.finish(self._registry) @@ -595,22 +594,13 @@ def _normalize_collections(self, collections: CollectionArgType | None) -> Colle def _clone( self, *, - collections: Any = None, - run: str | None = None, - inferDefaults: bool = True, + collections: CollectionArgType | None | EllipsisType = ..., + run: str | None | EllipsisType = ..., + inferDefaults: bool | EllipsisType = ..., **kwargs: Any, ) -> RemoteButler: - return RemoteButler( - connection=self._connection, - cache=self._cache, - options=ButlerInstanceOptions( - collections=collections, - run=run, - writeable=self.isWriteable(), - inferDefaults=inferDefaults, - kwargs=kwargs, - ), - ) + defaults = self._registry_defaults.get().clone(collections, run, inferDefaults, kwargs) + return RemoteButler(connection=self._connection, cache=self._cache, defaults=defaults) def __str__(self) -> str: return f"RemoteButler({self._connection.server_url})" diff --git a/python/lsst/daf/butler/tests/hybrid_butler.py b/python/lsst/daf/butler/tests/hybrid_butler.py index 6ee0256a57..5fb01da1e9 100644 --- a/python/lsst/daf/butler/tests/hybrid_butler.py +++ b/python/lsst/daf/butler/tests/hybrid_butler.py @@ -29,6 +29,7 @@ from collections.abc import Collection, Iterable, Sequence from contextlib import AbstractContextManager +from types import EllipsisType from typing import Any, TextIO, cast from lsst.resources import ResourcePath, ResourcePathExpression @@ -47,7 +48,7 @@ from ..dimensions import DataCoordinate, DataId, DimensionElement, DimensionRecord, DimensionUniverse from ..direct_butler import DirectButler from ..queries import Query -from ..registry import Registry +from ..registry import CollectionArgType, Registry from ..remote_butler import RemoteButler from ..transfers import RepoExportContext from .hybrid_butler_collections import HybridButlerCollections @@ -331,9 +332,9 @@ def query(self) -> AbstractContextManager[Query]: def _clone( self, *, - collections: Any = None, - run: str | None = None, - inferDefaults: bool = True, + collections: CollectionArgType | None | EllipsisType = ..., + run: str | None | EllipsisType = ..., + inferDefaults: bool | EllipsisType = ..., **kwargs: Any, ) -> HybridButler: remote_butler = self._remote_butler._clone(