From 57cb9c8f120e0e80da17e788eef9dc8e94305932 Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Thu, 12 Sep 2024 16:45:49 -0700 Subject: [PATCH 1/2] Make Butler._clone more user-friendly Butler._clone now copies default values from the original instance unless they are explicitly overridden. --- python/lsst/daf/butler/_butler.py | 31 ++++++-- .../butler/direct_butler/_direct_butler.py | 17 ++-- python/lsst/daf/butler/registry/_defaults.py | 77 ++++++++++++++++++- .../lsst/daf/butler/remote_butler/_factory.py | 3 +- .../butler/remote_butler/_remote_butler.py | 26 ++----- python/lsst/daf/butler/tests/hybrid_butler.py | 15 ++-- tests/test_simpleButler.py | 36 +++++++++ 7 files changed, 161 insertions(+), 44 deletions(-) diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py index 1630057df9..f35daa0de5 100644 --- a/python/lsst/daf/butler/_butler.py +++ b/python/lsst/daf/butler/_butler.py @@ -32,6 +32,7 @@ from abc import abstractmethod from collections.abc import Collection, Iterable, Mapping, Sequence from contextlib import AbstractContextManager +from types import EllipsisType from typing import TYPE_CHECKING, Any, TextIO from lsst.resources import ResourcePath, ResourcePathExpression @@ -63,7 +64,7 @@ from .datastore import DatasetRefURIs from .dimensions import DataId, DimensionGroup, DimensionRecord from .queries import Query - from .registry import Registry + from .registry import CollectionArgType, Registry from .transfers import RepoExportContext _LOG = getLogger(__name__) @@ -286,7 +287,7 @@ def from_config( raise TypeError( "Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument." ) - return butler._clone(collections=collections, run=run, inferDefaults=inferDefaults, **kwargs) + return butler._clone(collections=collections, run=run, inferDefaults=inferDefaults, dataId=kwargs) options = ButlerInstanceOptions( collections=collections, run=run, writeable=writeable, inferDefaults=inferDefaults, kwargs=kwargs @@ -1826,17 +1827,31 @@ def query_dimension_records( raise EmptyQueryResultError(list(result.explain_no_results())) return dimension_records - @abstractmethod def _clone( self, *, - collections: Any = None, - run: str | None = None, - inferDefaults: bool = True, - **kwargs: Any, + collections: CollectionArgType | None | EllipsisType = ..., + run: str | None | EllipsisType = ..., + inferDefaults: bool | EllipsisType = ..., + dataId: dict[str, str] | EllipsisType = ..., ) -> Butler: """Return a new Butler instance connected to the same repository - as this one, but overriding ``collections``, ``run``, + as this one, optionally overriding ``collections``, ``run``, ``inferDefaults``, and default data ID. + + Parameters + ---------- + collections : `~lsst.daf.butler.registry.CollectionArgType` or `None`,\ + optional + Same as constructor. If omitted, uses value from original object. + run : `str` or `None`, optional + Same as constructor. If `None`, no default run is used. If + omitted, copies value from original object. + inferDefaults : `bool`, optional + Same as constructor. If omitted, copies value from original + object. + dataId : `str` + Same as ``kwargs`` passed to the constructor. If omitted, copies + values from original object. """ raise NotImplementedError() diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler.py b/python/lsst/daf/butler/direct_butler/_direct_butler.py index b72df7934f..e1be115db1 100644 --- a/python/lsst/daf/butler/direct_butler/_direct_butler.py +++ b/python/lsst/daf/butler/direct_butler/_direct_butler.py @@ -44,6 +44,7 @@ import warnings from collections import Counter, defaultdict from collections.abc import Iterable, Iterator, MutableMapping, Sequence +from types import EllipsisType from typing import TYPE_CHECKING, Any, ClassVar, TextIO, cast from deprecated.sphinx import deprecated @@ -89,7 +90,7 @@ from .._file_dataset import FileDataset from ..datastore import DatasetRefURIs from ..dimensions import DataId, DataIdValue, DimensionElement, DimensionRecord, DimensionUniverse - from ..registry import Registry + from ..registry import CollectionArgType, Registry from ..transfers import RepoImportBackend _LOG = getLogger(__name__) @@ -182,9 +183,7 @@ def create_from_config( if "run" in config or "collection" in config: raise ValueError("Passing a run or collection via configuration is no longer supported.") - defaults = RegistryDefaults( - collections=options.collections, run=options.run, infer=options.inferDefaults, **options.kwargs - ) + defaults = RegistryDefaults.from_butler_instance_options(options) try: butlerRoot = config.get("root", config.configDir) writeable = options.writeable @@ -218,13 +217,13 @@ def create_from_config( def _clone( self, *, - collections: Any = None, - run: str | None = None, - inferDefaults: bool = True, - **kwargs: Any, + collections: CollectionArgType | None | EllipsisType = ..., + run: str | None | EllipsisType = ..., + inferDefaults: bool | EllipsisType = ..., + dataId: dict[str, str] | EllipsisType = ..., ) -> DirectButler: # Docstring inherited - defaults = RegistryDefaults(collections=collections, run=run, infer=inferDefaults, **kwargs) + defaults = self._registry.defaults.clone(collections, run, inferDefaults, dataId) registry = self._registry.copy(defaults) return DirectButler( diff --git a/python/lsst/daf/butler/registry/_defaults.py b/python/lsst/daf/butler/registry/_defaults.py index 46d194a69b..aab6ce7598 100644 --- a/python/lsst/daf/butler/registry/_defaults.py +++ b/python/lsst/daf/butler/registry/_defaults.py @@ -31,17 +31,19 @@ import contextlib from collections.abc import Sequence, Set +from types import EllipsisType from typing import TYPE_CHECKING, Any from lsst.utils.classes import immutable +from .._butler_instance_options import ButlerInstanceOptions from .._exceptions import MissingCollectionError from ..dimensions import DataCoordinate from ._collection_summary import CollectionSummary from .wildcards import CollectionWildcard if TYPE_CHECKING: - from ..registry import Registry + from ..registry import CollectionArgType, Registry from .sql_registry import SqlRegistry @@ -84,6 +86,8 @@ class RegistryDefaults: """ def __init__(self, collections: Any = None, run: str | None = None, infer: bool = True, **kwargs: str): + self._original_collection_was_none = collections is None + self._original_kwargs = dict(kwargs) if collections is None: if run is not None: collections = (run,) @@ -109,6 +113,77 @@ def from_data_id(data_id: DataCoordinate) -> RegistryDefaults: defaults._finished = True return defaults + @staticmethod + def from_butler_instance_options(options: ButlerInstanceOptions) -> RegistryDefaults: + """Create a `RegistryDefaults` object from the values specified by a + `ButlerInstanceOptions` object. + + Parameters + ---------- + options : `ButlerInstanceOptions` + Butler options object. + """ + return RegistryDefaults( + collections=options.collections, run=options.run, infer=options.inferDefaults, **options.kwargs + ) + + def clone( + self, + collections: CollectionArgType | None | EllipsisType = ..., + run: str | None | EllipsisType = ..., + inferDefaults: bool | EllipsisType = ..., + dataId: dict[str, str] | EllipsisType = ..., + ) -> RegistryDefaults: + """Make a copy of this RegistryDefaults object, optionally modifying + values. + + Parameters + ---------- + collections : `~lsst.daf.butler.registry.CollectionArgType` or `None`,\ + optional + Same as constructor. If omitted, uses value from original object. + run : `str` or `None`, optional + Same as constructor. If `None`, no default run is used. If + omitted, copies value from original object. + inferDefaults : `bool`, optional + Same as constructor. If omitted, copies value from original + object. + dataId : `dict` [ `str` , `str` ] + Same as ``kwargs`` arguments to constructor. If omitted, copies + values from original object. + + Returns + ------- + defaults : `RegistryDefaults` + New instance if any changes were made, otherwise the original + instance. + + Notes + ----- + ``finish()`` must be called on the returned object to complete + initialization. + """ + if collections is ... and run is ... and inferDefaults is ... and dataId is ...: + # Unmodified copy -- this object is immutable so we can just return + # it and avoid the need for database queries in finish(). + return self + + if collections is ...: + if self._original_collection_was_none: + # Ensure that defaulting collections to the run collection + # works the same as the constructor. + collections = None + else: + collections = self.collections + if run is ...: + run = self.run + if inferDefaults is ...: + inferDefaults = self._infer + if dataId is ...: + dataId = self._original_kwargs + + return RegistryDefaults(collections, run, inferDefaults, **dataId) + def __repr__(self) -> str: collections = f"collections={self.collections!r}" if self.collections else "" run = f"run={self.run!r}" if self.run else "" diff --git a/python/lsst/daf/butler/remote_butler/_factory.py b/python/lsst/daf/butler/remote_butler/_factory.py index de18a5b4e5..fc7de6ef41 100644 --- a/python/lsst/daf/butler/remote_butler/_factory.py +++ b/python/lsst/daf/butler/remote_butler/_factory.py @@ -34,6 +34,7 @@ from .._butler_config import ButlerConfig from .._butler_instance_options import ButlerInstanceOptions +from ..registry import RegistryDefaults from ._authentication import get_authentication_token_from_environment from ._config import RemoteButlerConfigModel from ._http_connection import RemoteButlerHttpConnection @@ -110,7 +111,7 @@ def create_butler_for_access_token( connection=RemoteButlerHttpConnection( http_client=self.http_client, server_url=self.server_url, access_token=access_token ), - options=butler_options, + defaults=RegistryDefaults.from_butler_instance_options(butler_options), cache=self._cache, use_disabled_datastore_cache=use_disabled_datastore_cache, ) diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py index 382a32cd93..1e752c23a5 100644 --- a/python/lsst/daf/butler/remote_butler/_remote_butler.py +++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py @@ -33,6 +33,7 @@ from collections.abc import Collection, Iterable, Iterator, Sequence from contextlib import AbstractContextManager, contextmanager from dataclasses import dataclass +from types import EllipsisType from typing import TYPE_CHECKING, Any, TextIO, cast from deprecated.sphinx import deprecated @@ -47,7 +48,6 @@ from .._butler import Butler from .._butler_collections import ButlerCollections -from .._butler_instance_options import ButlerInstanceOptions from .._dataset_existence import DatasetExistence from .._dataset_ref import DatasetId, DatasetRef from .._dataset_type import DatasetType @@ -129,7 +129,7 @@ def __new__( cls, *, connection: RemoteButlerHttpConnection, - options: ButlerInstanceOptions, + defaults: RegistryDefaults, cache: RemoteButlerCache, use_disabled_datastore_cache: bool = True, ) -> RemoteButler: @@ -141,7 +141,6 @@ def __new__( self._datastore_cache_manager = None self._use_disabled_datastore_cache = use_disabled_datastore_cache - defaults = RegistryDefaults(options.collections, options.run, options.inferDefaults, **options.kwargs) self._registry_defaults = DefaultsHolder(defaults) self._registry = RemoteButlerRegistry(self, self._registry_defaults, self._connection) defaults.finish(self._registry) @@ -595,22 +594,13 @@ def _normalize_collections(self, collections: CollectionArgType | None) -> Colle def _clone( self, *, - collections: Any = None, - run: str | None = None, - inferDefaults: bool = True, - **kwargs: Any, + collections: CollectionArgType | None | EllipsisType = ..., + run: str | None | EllipsisType = ..., + inferDefaults: bool | EllipsisType = ..., + dataId: dict[str, str] | EllipsisType = ..., ) -> RemoteButler: - return RemoteButler( - connection=self._connection, - cache=self._cache, - options=ButlerInstanceOptions( - collections=collections, - run=run, - writeable=self.isWriteable(), - inferDefaults=inferDefaults, - kwargs=kwargs, - ), - ) + defaults = self._registry_defaults.get().clone(collections, run, inferDefaults, dataId) + return RemoteButler(connection=self._connection, cache=self._cache, defaults=defaults) def __str__(self) -> str: return f"RemoteButler({self._connection.server_url})" diff --git a/python/lsst/daf/butler/tests/hybrid_butler.py b/python/lsst/daf/butler/tests/hybrid_butler.py index 6ee0256a57..652b519e35 100644 --- a/python/lsst/daf/butler/tests/hybrid_butler.py +++ b/python/lsst/daf/butler/tests/hybrid_butler.py @@ -29,6 +29,7 @@ from collections.abc import Collection, Iterable, Sequence from contextlib import AbstractContextManager +from types import EllipsisType from typing import Any, TextIO, cast from lsst.resources import ResourcePath, ResourcePathExpression @@ -47,7 +48,7 @@ from ..dimensions import DataCoordinate, DataId, DimensionElement, DimensionRecord, DimensionUniverse from ..direct_butler import DirectButler from ..queries import Query -from ..registry import Registry +from ..registry import CollectionArgType, Registry from ..remote_butler import RemoteButler from ..transfers import RepoExportContext from .hybrid_butler_collections import HybridButlerCollections @@ -331,16 +332,16 @@ def query(self) -> AbstractContextManager[Query]: def _clone( self, *, - collections: Any = None, - run: str | None = None, - inferDefaults: bool = True, - **kwargs: Any, + collections: CollectionArgType | None | EllipsisType = ..., + run: str | None | EllipsisType = ..., + inferDefaults: bool | EllipsisType = ..., + dataId: dict[str, str] | EllipsisType = ..., ) -> HybridButler: remote_butler = self._remote_butler._clone( - collections=collections, run=run, inferDefaults=inferDefaults, **kwargs + collections=collections, run=run, inferDefaults=inferDefaults, dataId=dataId ) direct_butler = self._direct_butler._clone( - collections=collections, run=run, inferDefaults=inferDefaults, **kwargs + collections=collections, run=run, inferDefaults=inferDefaults, dataId=dataId ) return HybridButler(remote_butler, direct_butler) diff --git a/tests/test_simpleButler.py b/tests/test_simpleButler.py index 0ef43f5e5f..261d1d9221 100644 --- a/tests/test_simpleButler.py +++ b/tests/test_simpleButler.py @@ -762,6 +762,42 @@ def test_skypix_templates(self): path = file_template.format(ref) self.assertEqual(path, "test/warp/HSCA02713600_HSC_12345_12345_12345_12345") + def test_clone(self): + # This just tests that the default-overriding logic works as expected. + # The actual internals are tested in test_butler.py, in + # ClonedSqliteButlerTestCase and + # ClonedPostgresPosixDatastoreButlerTestCase. + + butler = self.makeButler(writeable=True) + butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) + butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml")) + + # Original butler was created with the default arguments: + # collections = None + # run = None + # inferDefaults = True + # no explicit default data ID + + clone1 = butler._clone(collections="imported_g") + self.assertEqual(clone1.registry.defaults.dataId, {"instrument": "Cam1"}) + self.assertCountEqual(clone1.registry.defaults.collections, ["imported_g"]) + self.assertIsNone(clone1.run) + + clone2 = clone1._clone(inferDefaults=False) + self.assertEqual(clone2.registry.defaults.dataId, {}) + self.assertCountEqual(clone2.registry.defaults.collections, ["imported_g"]) + self.assertIsNone(clone2.run) + + clone3 = clone2._clone(run="imported_r") + self.assertEqual(clone3.registry.defaults.dataId, {}) + self.assertCountEqual(clone3.registry.defaults.collections, ["imported_g"]) + self.assertEqual(clone3.run, "imported_r") + + clone4 = butler._clone(run="imported_r") + self.assertEqual(clone4.registry.defaults.dataId, {"instrument": "Cam1"}) + self.assertCountEqual(clone4.registry.defaults.collections, ["imported_r"]) + self.assertEqual(clone4.run, "imported_r") + class DirectSimpleButlerTestCase(SimpleButlerTests, unittest.TestCase): """Run tests against DirectButler implementation.""" From 91556de9a1b00623a187fc0d79ae87c399a6da4b Mon Sep 17 00:00:00 2001 From: "David H. Irving" Date: Fri, 13 Sep 2024 10:43:13 -0700 Subject: [PATCH 2/2] Make Butler._clone public as Butler.clone --- doc/changes/DM-46298.feature.md | 1 + python/lsst/daf/butler/_butler.py | 9 +++--- .../daf/butler/_labeled_butler_factory.py | 2 +- .../butler/direct_butler/_direct_butler.py | 2 +- .../daf/butler/registry/tests/_registry.py | 4 +-- .../butler/remote_butler/_remote_butler.py | 2 +- python/lsst/daf/butler/tests/hybrid_butler.py | 6 ++-- python/lsst/daf/butler/tests/utils.py | 2 +- tests/test_butler.py | 6 ++-- tests/test_server.py | 2 +- tests/test_simpleButler.py | 30 ++++++++++++++++--- tests/test_sqlite.py | 2 +- 12 files changed, 45 insertions(+), 23 deletions(-) create mode 100644 doc/changes/DM-46298.feature.md diff --git a/doc/changes/DM-46298.feature.md b/doc/changes/DM-46298.feature.md new file mode 100644 index 0000000000..3ebd0bc36b --- /dev/null +++ b/doc/changes/DM-46298.feature.md @@ -0,0 +1 @@ +Added `Butler.clone()`, which lets you make a copy of a Butler instance, optionally overriding default collections/run/data ID. diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py index f35daa0de5..c06935bea0 100644 --- a/python/lsst/daf/butler/_butler.py +++ b/python/lsst/daf/butler/_butler.py @@ -276,9 +276,8 @@ def from_config( collection arguments. """ # DirectButler used to have a way to specify a "copy constructor" by - # passing the "butler" parameter to its constructor. This - # functionality has been moved out of the constructor into - # Butler._clone(), but the new interface is not public yet. + # passing the "butler" parameter to its constructor. This has + # been moved out of the constructor into Butler.clone(). butler = kwargs.pop("butler", None) if butler is not None: if not isinstance(butler, Butler): @@ -287,7 +286,7 @@ def from_config( raise TypeError( "Cannot pass 'config', 'searchPaths', or 'writeable' arguments with 'butler' argument." ) - return butler._clone(collections=collections, run=run, inferDefaults=inferDefaults, dataId=kwargs) + return butler.clone(collections=collections, run=run, inferDefaults=inferDefaults, dataId=kwargs) options = ButlerInstanceOptions( collections=collections, run=run, writeable=writeable, inferDefaults=inferDefaults, kwargs=kwargs @@ -1827,7 +1826,7 @@ def query_dimension_records( raise EmptyQueryResultError(list(result.explain_no_results())) return dimension_records - def _clone( + def clone( self, *, collections: CollectionArgType | None | EllipsisType = ..., diff --git a/python/lsst/daf/butler/_labeled_butler_factory.py b/python/lsst/daf/butler/_labeled_butler_factory.py index 3b9375544f..40f887ed0a 100644 --- a/python/lsst/daf/butler/_labeled_butler_factory.py +++ b/python/lsst/daf/butler/_labeled_butler_factory.py @@ -165,7 +165,7 @@ def _create_direct_butler_factory(config: ButlerConfig, preload_cache: bool) -> def create_butler(access_token: str | None) -> Butler: # Access token is ignored because DirectButler does not use Gafaelfawr # authentication. - return butler._clone() + return butler.clone() return create_butler diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler.py b/python/lsst/daf/butler/direct_butler/_direct_butler.py index e1be115db1..c689931999 100644 --- a/python/lsst/daf/butler/direct_butler/_direct_butler.py +++ b/python/lsst/daf/butler/direct_butler/_direct_butler.py @@ -214,7 +214,7 @@ def create_from_config( _LOG.error(f"Failed to instantiate Butler from config {config.configFile}.") raise - def _clone( + def clone( self, *, collections: CollectionArgType | None | EllipsisType = ..., diff --git a/python/lsst/daf/butler/registry/tests/_registry.py b/python/lsst/daf/butler/registry/tests/_registry.py index 974988acad..28e561cc06 100644 --- a/python/lsst/daf/butler/registry/tests/_registry.py +++ b/python/lsst/daf/butler/registry/tests/_registry.py @@ -723,7 +723,7 @@ def testCollections(self): """Tests for registry methods that manage collections.""" butler = self.make_butler() registry = butler.registry - other_registry = butler._clone().registry + other_registry = butler.clone().registry self.load_data(butler, "base.yaml", "datasets.yaml") run1 = "imported_g" run2 = "imported_r" @@ -975,7 +975,7 @@ def _do_collection_concurrency_test( # Set up two registries pointing to the same DB butler1 = self.make_butler() - butler2 = butler1._clone() + butler2 = butler1.clone() registry1 = butler1._registry assert isinstance(registry1, SqlRegistry) registry2 = butler2._registry diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py index 1e752c23a5..6bd40571b0 100644 --- a/python/lsst/daf/butler/remote_butler/_remote_butler.py +++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py @@ -591,7 +591,7 @@ def _normalize_collections(self, collections: CollectionArgType | None) -> Colle collections = self.collections return convert_collection_arg_to_glob_string_list(collections) - def _clone( + def clone( self, *, collections: CollectionArgType | None | EllipsisType = ..., diff --git a/python/lsst/daf/butler/tests/hybrid_butler.py b/python/lsst/daf/butler/tests/hybrid_butler.py index 652b519e35..ebe8824015 100644 --- a/python/lsst/daf/butler/tests/hybrid_butler.py +++ b/python/lsst/daf/butler/tests/hybrid_butler.py @@ -329,7 +329,7 @@ def registry(self) -> Registry: def query(self) -> AbstractContextManager[Query]: return self._remote_butler.query() - def _clone( + def clone( self, *, collections: CollectionArgType | None | EllipsisType = ..., @@ -337,10 +337,10 @@ def _clone( inferDefaults: bool | EllipsisType = ..., dataId: dict[str, str] | EllipsisType = ..., ) -> HybridButler: - remote_butler = self._remote_butler._clone( + remote_butler = self._remote_butler.clone( collections=collections, run=run, inferDefaults=inferDefaults, dataId=dataId ) - direct_butler = self._direct_butler._clone( + direct_butler = self._direct_butler.clone( collections=collections, run=run, inferDefaults=inferDefaults, dataId=dataId ) return HybridButler(remote_butler, direct_butler) diff --git a/python/lsst/daf/butler/tests/utils.py b/python/lsst/daf/butler/tests/utils.py index ffe62d4fe0..f03b6ad565 100644 --- a/python/lsst/daf/butler/tests/utils.py +++ b/python/lsst/daf/butler/tests/utils.py @@ -316,7 +316,7 @@ def create_from_butler( instance. """ self = cls.__new__(cls) - butler = butler._clone(run=self._DEFAULT_RUN, collections=[self._DEFAULT_TAG]) + butler = butler.clone(run=self._DEFAULT_RUN, collections=[self._DEFAULT_TAG]) self._do_init(butler, butler_config_file, storageClassName) return self diff --git a/tests/test_butler.py b/tests/test_butler.py index 8ef9e4b7bc..e08a01fb35 100644 --- a/tests/test_butler.py +++ b/tests/test_butler.py @@ -2113,7 +2113,7 @@ def create_butler( self, run: str, storageClass: StorageClass | str, datasetTypeName: str ) -> tuple[DirectButler, DatasetType]: butler, datasetType = super().create_butler(run, storageClass, datasetTypeName) - return butler._clone(run=run), datasetType + return butler.clone(run=run), datasetType class InMemoryDatastoreButlerTestCase(ButlerTests, unittest.TestCase): @@ -2138,7 +2138,7 @@ def create_butler( self, run: str, storageClass: StorageClass | str, datasetTypeName: str ) -> tuple[DirectButler, DatasetType]: butler, datasetType = super().create_butler(run, storageClass, datasetTypeName) - return butler._clone(run=run), datasetType + return butler.clone(run=run), datasetType class ChainedDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): @@ -2838,7 +2838,7 @@ def are_uris_equivalent(self, uri1: ResourcePath, uri2: ResourcePath) -> bool: return uri1.scheme == uri2.scheme and uri1.netloc == uri2.netloc and uri1.path == uri2.path def create_empty_butler(self, run: str | None = None, writeable: bool | None = None) -> Butler: - return self.server_instance.hybrid_butler._clone(run=run) + return self.server_instance.hybrid_butler.clone(run=run) def remove_dataset_out_of_band(self, butler: Butler, ref: DatasetRef) -> None: # Can't delete a file via S3 signed URLs, so we need to reach in diff --git a/tests/test_server.py b/tests/test_server.py index be7d0cd81c..96fd7de6b1 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -268,7 +268,7 @@ def test_get(self): ) # Test get() by DataId with default collections. - butler_with_default_collection = self.butler._clone(collections="ingest/run") + butler_with_default_collection = self.butler.clone(collections="ingest/run") default_collection_metric = butler_with_default_collection.get(dataset_type, dataId=data_id) self.assertEqual(metric, default_collection_metric) diff --git a/tests/test_simpleButler.py b/tests/test_simpleButler.py index 261d1d9221..68d1a484e3 100644 --- a/tests/test_simpleButler.py +++ b/tests/test_simpleButler.py @@ -771,6 +771,7 @@ def test_clone(self): butler = self.makeButler(writeable=True) butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "datasets.yaml")) + butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "spatial.yaml")) # Original butler was created with the default arguments: # collections = None @@ -778,26 +779,47 @@ def test_clone(self): # inferDefaults = True # no explicit default data ID - clone1 = butler._clone(collections="imported_g") + # Collections can be overridden, and default data ID will be inferred + # from it. + clone1 = butler.clone(collections="imported_g") self.assertEqual(clone1.registry.defaults.dataId, {"instrument": "Cam1"}) self.assertCountEqual(clone1.registry.defaults.collections, ["imported_g"]) self.assertIsNone(clone1.run) - clone2 = clone1._clone(inferDefaults=False) + # Disabling inferDefaults stops default data ID from being inferred + # from collections. + clone2 = clone1.clone(inferDefaults=False) self.assertEqual(clone2.registry.defaults.dataId, {}) self.assertCountEqual(clone2.registry.defaults.collections, ["imported_g"]) self.assertIsNone(clone2.run) - clone3 = clone2._clone(run="imported_r") + # Setting a new run doesn't override explicitly-set collections. + clone3 = clone2.clone(run="imported_r") self.assertEqual(clone3.registry.defaults.dataId, {}) self.assertCountEqual(clone3.registry.defaults.collections, ["imported_g"]) self.assertEqual(clone3.run, "imported_r") - clone4 = butler._clone(run="imported_r") + # Following the behavior of the Butler() constructor, run will populate + # collections if collections was None. Default data ID is inferred + # from the run collection. + clone4 = butler.clone(run="imported_r") self.assertEqual(clone4.registry.defaults.dataId, {"instrument": "Cam1"}) self.assertCountEqual(clone4.registry.defaults.collections, ["imported_r"]) self.assertEqual(clone4.run, "imported_r") + # Explicitly set data ID is combined with inferred defaults from + # collections. + clone5 = clone4.clone(dataId={"skymap": "SkyMap1"}) + self.assertEqual(clone5.registry.defaults.dataId, {"instrument": "Cam1", "skymap": "SkyMap1"}) + self.assertCountEqual(clone5.registry.defaults.collections, ["imported_r"]) + self.assertEqual(clone5.run, "imported_r") + + # Disabling inferred defaults preserves explicitly set data ID + clone6 = clone5.clone(inferDefaults=False) + self.assertEqual(clone6.registry.defaults.dataId, {"skymap": "SkyMap1"}) + self.assertCountEqual(clone5.registry.defaults.collections, ["imported_r"]) + self.assertEqual(clone5.run, "imported_r") + class DirectSimpleButlerTestCase(SimpleButlerTests, unittest.TestCase): """Run tests against DirectButler implementation.""" diff --git a/tests/test_sqlite.py b/tests/test_sqlite.py index d0ecc3254b..42b44eb397 100644 --- a/tests/test_sqlite.py +++ b/tests/test_sqlite.py @@ -232,7 +232,7 @@ class ClonedSqliteFileRegistryNameKeyCollMgrUUIDTestCase( def make_butler(self) -> Butler: original = super().make_butler() - return original._clone() + return original.clone() class SqliteFileRegistrySynthIntKeyCollMgrUUIDTestCase(SqliteFileRegistryTests, unittest.TestCase):