From 5c148da057be848c8a8a2e2491a85e7a33bd11fd Mon Sep 17 00:00:00 2001 From: Andy Salnikov Date: Thu, 12 Oct 2023 11:41:53 -0700 Subject: [PATCH 1/5] Rename _butler.py to direct_butler.py No other changes on this commit, doing renaming as a separate commit to simplify rebasing when my other ticket is merged. --- python/lsst/daf/butler/{_butler.py => direct_butler.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename python/lsst/daf/butler/{_butler.py => direct_butler.py} (100%) diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/direct_butler.py similarity index 100% rename from python/lsst/daf/butler/_butler.py rename to python/lsst/daf/butler/direct_butler.py From 80387d77802d4c44c19914a24f6cf8c8d03a2be9 Mon Sep 17 00:00:00 2001 From: Andy Salnikov Date: Thu, 12 Oct 2023 11:42:55 -0700 Subject: [PATCH 2/5] Make Butler an abstract class, with DirectButler as default implementation (DM-41116) `Butler` class has been renamed to `DirectButler`, and a new intermediate abstract base class `Butler` is introduced to define full butler interface (on top of `LimitedButler`). Butlers are now instantiated using a factory method `Butler.from_config`, call to `Butler` constructor will also return an instance of a concrete butler class, just as `Butler.from_config`, but `Butler(...)` will cause `mypy` complaints about an abstract class being instantiated. --- python/lsst/daf/butler/_butler.py | 1069 +++++++++++++++++ python/lsst/daf/butler/_quantum_backed.py | 2 +- python/lsst/daf/butler/_registry_shim.py | 6 +- python/lsst/daf/butler/direct_butler.py | 552 +-------- python/lsst/daf/butler/script/_associate.py | 2 +- .../lsst/daf/butler/script/_pruneDatasets.py | 4 +- python/lsst/daf/butler/script/butlerImport.py | 2 +- .../daf/butler/script/certifyCalibrations.py | 2 +- .../lsst/daf/butler/script/collectionChain.py | 2 +- .../lsst/daf/butler/script/configValidate.py | 2 +- python/lsst/daf/butler/script/exportCalibs.py | 2 +- python/lsst/daf/butler/script/ingest_files.py | 2 +- .../daf/butler/script/queryCollections.py | 6 +- python/lsst/daf/butler/script/queryDataIds.py | 5 +- .../daf/butler/script/queryDatasetTypes.py | 2 +- .../lsst/daf/butler/script/queryDatasets.py | 2 +- .../butler/script/queryDimensionRecords.py | 2 +- .../butler/script/register_dataset_type.py | 2 +- .../daf/butler/script/removeCollections.py | 4 +- .../daf/butler/script/removeDatasetType.py | 2 +- python/lsst/daf/butler/script/removeRuns.py | 4 +- .../daf/butler/script/retrieveArtifacts.py | 2 +- .../daf/butler/script/transferDatasets.py | 4 +- python/lsst/daf/butler/server.py | 8 +- python/lsst/daf/butler/tests/_testRepo.py | 4 +- python/lsst/daf/butler/tests/utils.py | 2 +- tests/test_butler.py | 87 +- tests/test_cliCmdIngestFiles.py | 2 +- tests/test_cliCmdPruneDatasets.py | 4 +- tests/test_cliCmdQueryCollections.py | 4 +- tests/test_cliCmdQueryDataIds.py | 2 +- tests/test_cliCmdQueryDimensionRecords.py | 4 +- tests/test_cliCmdRemoveCollections.py | 2 +- tests/test_logFormatter.py | 2 +- tests/test_matplotlibFormatter.py | 2 +- tests/test_packages.py | 2 +- tests/test_parquet.py | 24 +- tests/test_quantumBackedButler.py | 5 +- tests/test_simpleButler.py | 6 +- tests/test_testRepo.py | 2 +- 40 files changed, 1211 insertions(+), 633 deletions(-) create mode 100644 python/lsst/daf/butler/_butler.py diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py new file mode 100644 index 0000000000..7510aa65b7 --- /dev/null +++ b/python/lsst/daf/butler/_butler.py @@ -0,0 +1,1069 @@ +# This file is part of daf_butler. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This software is dual licensed under the GNU General Public License and also +# under a 3-clause BSD license. Recipients may choose which of these licenses +# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, +# respectively. If you choose the GPL option then the following text applies +# (but note that there is still no warranty even if you opt for BSD instead): +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from __future__ import annotations + +__all__ = ["Butler"] + +from abc import abstractmethod +from collections.abc import Collection, Iterable, Sequence +from contextlib import AbstractContextManager +from typing import Any, TextIO + +from lsst.resources import ResourcePath, ResourcePathExpression +from lsst.utils import doImportType +from lsst.utils.logging import getLogger + +from ._butler_config import ButlerConfig +from ._butler_repo_index import ButlerRepoIndex +from ._config import Config, ConfigSubset +from ._dataset_existence import DatasetExistence +from ._dataset_ref import DatasetIdGenEnum, DatasetRef +from ._dataset_type import DatasetType +from ._deferredDatasetHandle import DeferredDatasetHandle +from ._file_dataset import FileDataset +from ._limited_butler import LimitedButler +from ._storage_class import StorageClass +from .datastore import DatasetRefURIs, Datastore +from .dimensions import DataId, DimensionConfig +from .registry import Registry, RegistryConfig, _RegistryFactory +from .repo_relocation import BUTLER_ROOT_TAG +from .transfers import RepoExportContext + +log = getLogger(__name__) + + +class Butler(LimitedButler): + """Interface for data butler and factory for Butler instances. + + Parameters + ---------- + config : `ButlerConfig`, `Config` or `str`, optional. + Configuration. Anything acceptable to the `ButlerConfig` constructor. + If a directory path is given the configuration will be read from a + ``butler.yaml`` file in that location. If `None` is given default + values will be used. If ``config`` contains "cls" key then its value is + used as a name of butler class and it must be a sub-class of this + class, otherwise `DirectButler` is instantiated. + **kwargs : `Any` + Optional keyword arguments passed to a constructor of actual butler + class. + + Notes + ----- + The preferred way to instantiate Butler is via the `from_config` method. + The call to ``Butler(...)`` is equivalent to ``Butler.from_config(...)``, + but ``mypy`` will complain about the former. + """ + + def __new__(cls, config: Config | ResourcePathExpression | None = None, **kwargs: Any) -> Butler: + if cls is Butler: + cls = cls._find_butler_class(config, **kwargs) + # Note: we do not pass any parameters to __new__, Python will pass them + # to __init__ after __new__ returns sub-class instance. + return super().__new__(cls) + + @staticmethod + def _find_butler_class( + config: Config | ResourcePathExpression | None = None, **kwargs: Any + ) -> type[Butler]: + """Find actual class to instantiate.""" + butler_class_name: str | None = None + if config is not None: + # Check for optional "cls" key in config. + if not isinstance(config, Config): + config = ButlerConfig(config, searchPaths=kwargs.get("searchPaths")) + butler_class_name = config.get("cls") + + # Make DirectButler if class is not specified. + butler_class: type[Butler] + if butler_class_name is None: + from .direct_butler import DirectButler + + butler_class = DirectButler + else: + butler_class = doImportType(butler_class_name) + if not issubclass(butler_class, Butler): + raise TypeError(f"{butler_class_name} is not a subclass of Butler") + return butler_class + + @classmethod + def from_config(cls, config: Config | ResourcePathExpression | None = None, **kwargs: Any) -> Butler: + """Create butler instance from configuration. + + Parameters + ---------- + config : `ButlerConfig`, `Config` or `str`, optional. + Configuration. Anything acceptable to the `ButlerConfig` + constructor. If a directory path is given the configuration will be + read from a ``butler.yaml`` file in that location. If `None` is + given default values will be used. If ``config`` contains "cls" key + then its value is used as a name of butler class and it must be a + sub-class of this class, otherwise `DirectButler` is instantiated. + **kwargs : `Any` + Optional keyword arguments passed to a constructor of actual butler + class. + + Notes + ----- + Calling this factory method is identical to calling + ``Butler(config, ...)``. Its only raison d'ĂȘtre is that ``mypy`` + complains about ``Butler()`` call. + """ + cls = cls._find_butler_class(config, **kwargs) + return cls(config, **kwargs) + + @staticmethod + def makeRepo( + root: ResourcePathExpression, + config: Config | str | None = None, + dimensionConfig: Config | str | None = None, + standalone: bool = False, + searchPaths: list[str] | None = None, + forceConfigRoot: bool = True, + outfile: ResourcePathExpression | None = None, + overwrite: bool = False, + ) -> Config: + """Create an empty data repository by adding a butler.yaml config + to a repository root directory. + + Parameters + ---------- + root : `lsst.resources.ResourcePathExpression` + Path or URI to the root location of the new repository. Will be + created if it does not exist. + config : `Config` or `str`, optional + Configuration to write to the repository, after setting any + root-dependent Registry or Datastore config options. Can not + be a `ButlerConfig` or a `ConfigSubset`. If `None`, default + configuration will be used. Root-dependent config options + specified in this config are overwritten if ``forceConfigRoot`` + is `True`. + dimensionConfig : `Config` or `str`, optional + Configuration for dimensions, will be used to initialize registry + database. + standalone : `bool` + If True, write all expanded defaults, not just customized or + repository-specific settings. + This (mostly) decouples the repository from the default + configuration, insulating it from changes to the defaults (which + may be good or bad, depending on the nature of the changes). + Future *additions* to the defaults will still be picked up when + initializing `Butlers` to repos created with ``standalone=True``. + searchPaths : `list` of `str`, optional + Directory paths to search when calculating the full butler + configuration. + forceConfigRoot : `bool`, optional + If `False`, any values present in the supplied ``config`` that + would normally be reset are not overridden and will appear + directly in the output config. This allows non-standard overrides + of the root directory for a datastore or registry to be given. + If this parameter is `True` the values for ``root`` will be + forced into the resulting config if appropriate. + outfile : `lss.resources.ResourcePathExpression`, optional + If not-`None`, the output configuration will be written to this + location rather than into the repository itself. Can be a URI + string. Can refer to a directory that will be used to write + ``butler.yaml``. + overwrite : `bool`, optional + Create a new configuration file even if one already exists + in the specified output location. Default is to raise + an exception. + + Returns + ------- + config : `Config` + The updated `Config` instance written to the repo. + + Raises + ------ + ValueError + Raised if a ButlerConfig or ConfigSubset is passed instead of a + regular Config (as these subclasses would make it impossible to + support ``standalone=False``). + FileExistsError + Raised if the output config file already exists. + os.error + Raised if the directory does not exist, exists but is not a + directory, or cannot be created. + + Notes + ----- + Note that when ``standalone=False`` (the default), the configuration + search path (see `ConfigSubset.defaultSearchPaths`) that was used to + construct the repository should also be used to construct any Butlers + to avoid configuration inconsistencies. + """ + if isinstance(config, ButlerConfig | ConfigSubset): + raise ValueError("makeRepo must be passed a regular Config without defaults applied.") + + # Ensure that the root of the repository exists or can be made + root_uri = ResourcePath(root, forceDirectory=True) + root_uri.mkdir() + + config = Config(config) + + # If we are creating a new repo from scratch with relative roots, + # do not propagate an explicit root from the config file + if "root" in config: + del config["root"] + + full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults + imported_class = doImportType(full["datastore", "cls"]) + if not issubclass(imported_class, Datastore): + raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore") + datastoreClass: type[Datastore] = imported_class + datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) + + # if key exists in given config, parse it, otherwise parse the defaults + # in the expanded config + if config.get(("registry", "db")): + registryConfig = RegistryConfig(config) + else: + registryConfig = RegistryConfig(full) + defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) + if defaultDatabaseUri is not None: + Config.updateParameters( + RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot + ) + else: + Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot) + + if standalone: + config.merge(full) + else: + # Always expand the registry.managers section into the per-repo + # config, because after the database schema is created, it's not + # allowed to change anymore. Note that in the standalone=True + # branch, _everything_ in the config is expanded, so there's no + # need to special case this. + Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False) + configURI: ResourcePathExpression + if outfile is not None: + # When writing to a separate location we must include + # the root of the butler repo in the config else it won't know + # where to look. + config["root"] = root_uri.geturl() + configURI = outfile + else: + configURI = root_uri + # Strip obscore configuration, if it is present, before writing config + # to a file, obscore config will be stored in registry. + if (obscore_config_key := ("registry", "managers", "obscore", "config")) in config: + config_to_write = config.copy() + del config_to_write[obscore_config_key] + config_to_write.dumpToUri(configURI, overwrite=overwrite) + # configFile attribute is updated, need to copy it to original. + config.configFile = config_to_write.configFile + else: + config.dumpToUri(configURI, overwrite=overwrite) + + # Create Registry and populate tables + registryConfig = RegistryConfig(config.get("registry")) + dimensionConfig = DimensionConfig(dimensionConfig) + _RegistryFactory(registryConfig).create_from_config( + dimensionConfig=dimensionConfig, butlerRoot=root_uri + ) + + log.verbose("Wrote new Butler configuration file to %s", configURI) + + return config + + @classmethod + def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath: + """Look up the label in a butler repository index. + + Parameters + ---------- + label : `str` + Label of the Butler repository to look up. + return_label : `bool`, optional + If ``label`` cannot be found in the repository index (either + because index is not defined or ``label`` is not in the index) and + ``return_label`` is `True` then return ``ResourcePath(label)``. + If ``return_label`` is `False` (default) then an exception will be + raised instead. + + Returns + ------- + uri : `lsst.resources.ResourcePath` + URI to the Butler repository associated with the given label or + default value if it is provided. + + Raises + ------ + KeyError + Raised if the label is not found in the index, or if an index + is not defined, and ``return_label`` is `False`. + + Notes + ----- + See `~lsst.daf.butler.ButlerRepoIndex` for details on how the + information is discovered. + """ + return ButlerRepoIndex.get_repo_uri(label, return_label) + + @classmethod + def get_known_repos(cls) -> set[str]: + """Retrieve the list of known repository labels. + + Returns + ------- + repos : `set` of `str` + All the known labels. Can be empty if no index can be found. + + Notes + ----- + See `~lsst.daf.butler.ButlerRepoIndex` for details on how the + information is discovered. + """ + return ButlerRepoIndex.get_known_repos() + + @abstractmethod + def transaction(self) -> AbstractContextManager[None]: + """Context manager supporting `Butler` transactions. + + Transactions can be nested. + """ + raise NotImplementedError() + + @abstractmethod + def put( + self, + obj: Any, + datasetRefOrType: DatasetRef | DatasetType | str, + /, + dataId: DataId | None = None, + *, + run: str | None = None, + **kwargs: Any, + ) -> DatasetRef: + """Store and register a dataset. + + Parameters + ---------- + obj : `object` + The dataset. + datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` + When `DatasetRef` is provided, ``dataId`` should be `None`. + Otherwise the `DatasetType` or name thereof. If a fully resolved + `DatasetRef` is given the run and ID are used directly. + dataId : `dict` or `DataCoordinate` + A `dict` of `Dimension` link name, value pairs that label the + `DatasetRef` within a Collection. When `None`, a `DatasetRef` + should be provided as the second argument. + run : `str`, optional + The name of the run the dataset should be added to, overriding + ``self.run``. Not used if a resolved `DatasetRef` is provided. + **kwargs + Additional keyword arguments used to augment or construct a + `DataCoordinate`. See `DataCoordinate.standardize` + parameters. Not used if a resolve `DatasetRef` is provided. + + Returns + ------- + ref : `DatasetRef` + A reference to the stored dataset, updated with the correct id if + given. + + Raises + ------ + TypeError + Raised if the butler is read-only or if no run has been provided. + """ + raise NotImplementedError() + + @abstractmethod + def getDeferred( + self, + datasetRefOrType: DatasetRef | DatasetType | str, + /, + dataId: DataId | None = None, + *, + parameters: dict | None = None, + collections: Any = None, + storageClass: str | StorageClass | None = None, + **kwargs: Any, + ) -> DeferredDatasetHandle: + """Create a `DeferredDatasetHandle` which can later retrieve a dataset, + after an immediate registry lookup. + + Parameters + ---------- + datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` + When `DatasetRef` the `dataId` should be `None`. + Otherwise the `DatasetType` or name thereof. + dataId : `dict` or `DataCoordinate`, optional + A `dict` of `Dimension` link name, value pairs that label the + `DatasetRef` within a Collection. When `None`, a `DatasetRef` + should be provided as the first argument. + parameters : `dict` + Additional StorageClass-defined options to control reading, + typically used to efficiently read only a subset of the dataset. + collections : Any, optional + Collections to be searched, overriding ``self.collections``. + Can be any of the types supported by the ``collections`` argument + to butler construction. + storageClass : `StorageClass` or `str`, optional + The storage class to be used to override the Python type + returned by this method. By default the returned type matches + the dataset type definition for this dataset. Specifying a + read `StorageClass` can force a different type to be returned. + This type must be compatible with the original type. + **kwargs + Additional keyword arguments used to augment or construct a + `DataId`. See `DataId` parameters. + + Returns + ------- + obj : `DeferredDatasetHandle` + A handle which can be used to retrieve a dataset at a later time. + + Raises + ------ + LookupError + Raised if no matching dataset exists in the `Registry` or + datastore. + ValueError + Raised if a resolved `DatasetRef` was passed as an input, but it + differs from the one found in the registry. + TypeError + Raised if no collections were provided. + """ + raise NotImplementedError() + + @abstractmethod + def get( + self, + datasetRefOrType: DatasetRef | DatasetType | str, + /, + dataId: DataId | None = None, + *, + parameters: dict[str, Any] | None = None, + collections: Any = None, + storageClass: StorageClass | str | None = None, + **kwargs: Any, + ) -> Any: + """Retrieve a stored dataset. + + Parameters + ---------- + datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` + When `DatasetRef` the `dataId` should be `None`. + Otherwise the `DatasetType` or name thereof. + If a resolved `DatasetRef`, the associated dataset + is returned directly without additional querying. + dataId : `dict` or `DataCoordinate` + A `dict` of `Dimension` link name, value pairs that label the + `DatasetRef` within a Collection. When `None`, a `DatasetRef` + should be provided as the first argument. + parameters : `dict` + Additional StorageClass-defined options to control reading, + typically used to efficiently read only a subset of the dataset. + collections : Any, optional + Collections to be searched, overriding ``self.collections``. + Can be any of the types supported by the ``collections`` argument + to butler construction. + storageClass : `StorageClass` or `str`, optional + The storage class to be used to override the Python type + returned by this method. By default the returned type matches + the dataset type definition for this dataset. Specifying a + read `StorageClass` can force a different type to be returned. + This type must be compatible with the original type. + **kwargs + Additional keyword arguments used to augment or construct a + `DataCoordinate`. See `DataCoordinate.standardize` + parameters. + + Returns + ------- + obj : `object` + The dataset. + + Raises + ------ + LookupError + Raised if no matching dataset exists in the `Registry`. + TypeError + Raised if no collections were provided. + + Notes + ----- + When looking up datasets in a `~CollectionType.CALIBRATION` collection, + this method requires that the given data ID include temporal dimensions + beyond the dimensions of the dataset type itself, in order to find the + dataset with the appropriate validity range. For example, a "bias" + dataset with native dimensions ``{instrument, detector}`` could be + fetched with a ``{instrument, detector, exposure}`` data ID, because + ``exposure`` is a temporal dimension. + """ + raise NotImplementedError() + + @abstractmethod + def getURIs( + self, + datasetRefOrType: DatasetRef | DatasetType | str, + /, + dataId: DataId | None = None, + *, + predict: bool = False, + collections: Any = None, + run: str | None = None, + **kwargs: Any, + ) -> DatasetRefURIs: + """Return the URIs associated with the dataset. + + Parameters + ---------- + datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` + When `DatasetRef` the `dataId` should be `None`. + Otherwise the `DatasetType` or name thereof. + dataId : `dict` or `DataCoordinate` + A `dict` of `Dimension` link name, value pairs that label the + `DatasetRef` within a Collection. When `None`, a `DatasetRef` + should be provided as the first argument. + predict : `bool` + If `True`, allow URIs to be returned of datasets that have not + been written. + collections : Any, optional + Collections to be searched, overriding ``self.collections``. + Can be any of the types supported by the ``collections`` argument + to butler construction. + run : `str`, optional + Run to use for predictions, overriding ``self.run``. + **kwargs + Additional keyword arguments used to augment or construct a + `DataCoordinate`. See `DataCoordinate.standardize` + parameters. + + Returns + ------- + uris : `DatasetRefURIs` + The URI to the primary artifact associated with this dataset (if + the dataset was disassembled within the datastore this may be + `None`), and the URIs to any components associated with the dataset + artifact. (can be empty if there are no components). + """ + raise NotImplementedError() + + @abstractmethod + def getURI( + self, + datasetRefOrType: DatasetRef | DatasetType | str, + /, + dataId: DataId | None = None, + *, + predict: bool = False, + collections: Any = None, + run: str | None = None, + **kwargs: Any, + ) -> ResourcePath: + """Return the URI to the Dataset. + + Parameters + ---------- + datasetRefOrType : `DatasetRef`, `DatasetType`, or `str` + When `DatasetRef` the `dataId` should be `None`. + Otherwise the `DatasetType` or name thereof. + dataId : `dict` or `DataCoordinate` + A `dict` of `Dimension` link name, value pairs that label the + `DatasetRef` within a Collection. When `None`, a `DatasetRef` + should be provided as the first argument. + predict : `bool` + If `True`, allow URIs to be returned of datasets that have not + been written. + collections : Any, optional + Collections to be searched, overriding ``self.collections``. + Can be any of the types supported by the ``collections`` argument + to butler construction. + run : `str`, optional + Run to use for predictions, overriding ``self.run``. + **kwargs + Additional keyword arguments used to augment or construct a + `DataCoordinate`. See `DataCoordinate.standardize` + parameters. + + Returns + ------- + uri : `lsst.resources.ResourcePath` + URI pointing to the Dataset within the datastore. If the + Dataset does not exist in the datastore, and if ``predict`` is + `True`, the URI will be a prediction and will include a URI + fragment "#predicted". + If the datastore does not have entities that relate well + to the concept of a URI the returned URI string will be + descriptive. The returned URI is not guaranteed to be obtainable. + + Raises + ------ + LookupError + A URI has been requested for a dataset that does not exist and + guessing is not allowed. + ValueError + Raised if a resolved `DatasetRef` was passed as an input, but it + differs from the one found in the registry. + TypeError + Raised if no collections were provided. + RuntimeError + Raised if a URI is requested for a dataset that consists of + multiple artifacts. + """ + raise NotImplementedError() + + @abstractmethod + def retrieveArtifacts( + self, + refs: Iterable[DatasetRef], + destination: ResourcePathExpression, + transfer: str = "auto", + preserve_path: bool = True, + overwrite: bool = False, + ) -> list[ResourcePath]: + """Retrieve the artifacts associated with the supplied refs. + + Parameters + ---------- + refs : iterable of `DatasetRef` + The datasets for which artifacts are to be retrieved. + A single ref can result in multiple artifacts. The refs must + be resolved. + destination : `lsst.resources.ResourcePath` or `str` + Location to write the artifacts. + transfer : `str`, optional + Method to use to transfer the artifacts. Must be one of the options + supported by `~lsst.resources.ResourcePath.transfer_from()`. + "move" is not allowed. + preserve_path : `bool`, optional + If `True` the full path of the artifact within the datastore + is preserved. If `False` the final file component of the path + is used. + overwrite : `bool`, optional + If `True` allow transfers to overwrite existing files at the + destination. + + Returns + ------- + targets : `list` of `lsst.resources.ResourcePath` + URIs of file artifacts in destination location. Order is not + preserved. + + Notes + ----- + For non-file datastores the artifacts written to the destination + may not match the representation inside the datastore. For example + a hierarchical data structure in a NoSQL database may well be stored + as a JSON file. + """ + raise NotImplementedError() + + @abstractmethod + def exists( + self, + dataset_ref_or_type: DatasetRef | DatasetType | str, + /, + data_id: DataId | None = None, + *, + full_check: bool = True, + collections: Any = None, + **kwargs: Any, + ) -> DatasetExistence: + """Indicate whether a dataset is known to Butler registry and + datastore. + + Parameters + ---------- + dataset_ref_or_type : `DatasetRef`, `DatasetType`, or `str` + When `DatasetRef` the `dataId` should be `None`. + Otherwise the `DatasetType` or name thereof. + data_id : `dict` or `DataCoordinate` + A `dict` of `Dimension` link name, value pairs that label the + `DatasetRef` within a Collection. When `None`, a `DatasetRef` + should be provided as the first argument. + full_check : `bool`, optional + If `True`, an additional check will be made for dataset artifact + existence. This will involve additional overhead due to the need + to query an external system. If `False` registry and datastore + will solely be asked if they know about the dataset but no + check for the artifact will be performed. + collections : Any, optional + Collections to be searched, overriding ``self.collections``. + Can be any of the types supported by the ``collections`` argument + to butler construction. + **kwargs + Additional keyword arguments used to augment or construct a + `DataCoordinate`. See `DataCoordinate.standardize` + parameters. + + Returns + ------- + existence : `DatasetExistence` + Object indicating whether the dataset is known to registry and + datastore. Evaluates to `True` if the dataset is present and known + to both. + """ + raise NotImplementedError() + + @abstractmethod + def _exists_many( + self, + refs: Iterable[DatasetRef], + /, + *, + full_check: bool = True, + ) -> dict[DatasetRef, DatasetExistence]: + """Indicate whether multiple datasets are known to Butler registry and + datastore. + + This is an experimental API that may change at any moment. + + Parameters + ---------- + refs : iterable of `DatasetRef` + The datasets to be checked. + full_check : `bool`, optional + If `True`, an additional check will be made for dataset artifact + existence. This will involve additional overhead due to the need + to query an external system. If `False` registry and datastore + will solely be asked if they know about the dataset but no + check for the artifact will be performed. + + Returns + ------- + existence : dict of [`DatasetRef`, `DatasetExistence`] + Mapping from the given dataset refs to an enum indicating the + status of the dataset in registry and datastore. + Each value evaluates to `True` if the dataset is present and known + to both. + """ + raise NotImplementedError() + + @abstractmethod + def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: + """Remove one or more `~CollectionType.RUN` collections and the + datasets within them. + + Parameters + ---------- + names : `~collections.abc.Iterable` [ `str` ] + The names of the collections to remove. + unstore : `bool`, optional + If `True` (default), delete datasets from all datastores in which + they are present, and attempt to rollback the registry deletions if + datastore deletions fail (which may not always be possible). If + `False`, datastore records for these datasets are still removed, + but any artifacts (e.g. files) will not be. + + Raises + ------ + TypeError + Raised if one or more collections are not of type + `~CollectionType.RUN`. + """ + raise NotImplementedError() + + @abstractmethod + def ingest( + self, + *datasets: FileDataset, + transfer: str | None = "auto", + run: str | None = None, + idGenerationMode: DatasetIdGenEnum | None = None, + record_validation_info: bool = True, + ) -> None: + """Store and register one or more datasets that already exist on disk. + + Parameters + ---------- + datasets : `FileDataset` + Each positional argument is a struct containing information about + a file to be ingested, including its URI (either absolute or + relative to the datastore root, if applicable), a resolved + `DatasetRef`, and optionally a formatter class or its + fully-qualified string name. If a formatter is not provided, the + formatter that would be used for `put` is assumed. On successful + ingest all `FileDataset.formatter` attributes will be set to the + formatter class used. `FileDataset.path` attributes may be modified + to put paths in whatever the datastore considers a standardized + form. + transfer : `str`, optional + If not `None`, must be one of 'auto', 'move', 'copy', 'direct', + 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to + transfer the file. + run : `str`, optional + The name of the run ingested datasets should be added to, + overriding ``self.run``. This parameter is now deprecated since + the run is encoded in the ``FileDataset``. + idGenerationMode : `DatasetIdGenEnum`, optional + Specifies option for generating dataset IDs. Parameter is + deprecated. + record_validation_info : `bool`, optional + If `True`, the default, the datastore can record validation + information associated with the file. If `False` the datastore + will not attempt to track any information such as checksums + or file sizes. This can be useful if such information is tracked + in an external system or if the file is to be compressed in place. + It is up to the datastore whether this parameter is relevant. + + Raises + ------ + TypeError + Raised if the butler is read-only or if no run was provided. + NotImplementedError + Raised if the `Datastore` does not support the given transfer mode. + DatasetTypeNotSupportedError + Raised if one or more files to be ingested have a dataset type that + is not supported by the `Datastore`.. + FileNotFoundError + Raised if one of the given files does not exist. + FileExistsError + Raised if transfer is not `None` but the (internal) location the + file would be moved to is already occupied. + + Notes + ----- + This operation is not fully exception safe: if a database operation + fails, the given `FileDataset` instances may be only partially updated. + + It is atomic in terms of database operations (they will either all + succeed or all fail) providing the database engine implements + transactions correctly. It will attempt to be atomic in terms of + filesystem operations as well, but this cannot be implemented + rigorously for most datastores. + """ + raise NotImplementedError() + + @abstractmethod + def export( + self, + *, + directory: str | None = None, + filename: str | None = None, + format: str | None = None, + transfer: str | None = None, + ) -> AbstractContextManager[RepoExportContext]: + """Export datasets from the repository represented by this `Butler`. + + This method is a context manager that returns a helper object + (`RepoExportContext`) that is used to indicate what information from + the repository should be exported. + + Parameters + ---------- + directory : `str`, optional + Directory dataset files should be written to if ``transfer`` is not + `None`. + filename : `str`, optional + Name for the file that will include database information associated + with the exported datasets. If this is not an absolute path and + ``directory`` is not `None`, it will be written to ``directory`` + instead of the current working directory. Defaults to + "export.{format}". + format : `str`, optional + File format for the database information file. If `None`, the + extension of ``filename`` will be used. + transfer : `str`, optional + Transfer mode passed to `Datastore.export`. + + Raises + ------ + TypeError + Raised if the set of arguments passed is inconsistent. + + Examples + -------- + Typically the `Registry.queryDataIds` and `Registry.queryDatasets` + methods are used to provide the iterables over data IDs and/or datasets + to be exported:: + + with butler.export("exports.yaml") as export: + # Export all flats, but none of the dimension element rows + # (i.e. data ID information) associated with them. + export.saveDatasets(butler.registry.queryDatasets("flat"), + elements=()) + # Export all datasets that start with "deepCoadd_" and all of + # their associated data ID information. + export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) + """ + raise NotImplementedError() + + @abstractmethod + def import_( + self, + *, + directory: ResourcePathExpression | None = None, + filename: ResourcePathExpression | TextIO | None = None, + format: str | None = None, + transfer: str | None = None, + skip_dimensions: set | None = None, + ) -> None: + """Import datasets into this repository that were exported from a + different butler repository via `~lsst.daf.butler.Butler.export`. + + Parameters + ---------- + directory : `~lsst.resources.ResourcePathExpression`, optional + Directory containing dataset files to import from. If `None`, + ``filename`` and all dataset file paths specified therein must + be absolute. + filename : `~lsst.resources.ResourcePathExpression` or `TextIO` + A stream or name of file that contains database information + associated with the exported datasets, typically generated by + `~lsst.daf.butler.Butler.export`. If this a string (name) or + `~lsst.resources.ResourcePath` and is not an absolute path, + it will first be looked for relative to ``directory`` and if not + found there it will be looked for in the current working + directory. Defaults to "export.{format}". + format : `str`, optional + File format for ``filename``. If `None`, the extension of + ``filename`` will be used. + transfer : `str`, optional + Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`. + skip_dimensions : `set`, optional + Names of dimensions that should be skipped and not imported. + + Raises + ------ + TypeError + Raised if the set of arguments passed is inconsistent, or if the + butler is read-only. + """ + raise NotImplementedError() + + @abstractmethod + def transfer_from( + self, + source_butler: LimitedButler, + source_refs: Iterable[DatasetRef], + transfer: str = "auto", + skip_missing: bool = True, + register_dataset_types: bool = False, + transfer_dimensions: bool = False, + ) -> Collection[DatasetRef]: + """Transfer datasets to this Butler from a run in another Butler. + + Parameters + ---------- + source_butler : `LimitedButler` + Butler from which the datasets are to be transferred. If data IDs + in ``source_refs`` are not expanded then this has to be a full + `Butler` whose registry will be used to expand data IDs. + source_refs : iterable of `DatasetRef` + Datasets defined in the source butler that should be transferred to + this butler. + transfer : `str`, optional + Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`. + skip_missing : `bool` + If `True`, datasets with no datastore artifact associated with + them are not transferred. If `False` a registry entry will be + created even if no datastore record is created (and so will + look equivalent to the dataset being unstored). + register_dataset_types : `bool` + If `True` any missing dataset types are registered. Otherwise + an exception is raised. + transfer_dimensions : `bool`, optional + If `True`, dimension record data associated with the new datasets + will be transferred. + + Returns + ------- + refs : `list` of `DatasetRef` + The refs added to this Butler. + + Notes + ----- + The datastore artifact has to exist for a transfer + to be made but non-existence is not an error. + + Datasets that already exist in this run will be skipped. + + The datasets are imported as part of a transaction, although + dataset types are registered before the transaction is started. + This means that it is possible for a dataset type to be registered + even though transfer has failed. + """ + raise NotImplementedError() + + @abstractmethod + def validateConfiguration( + self, + logFailures: bool = False, + datasetTypeNames: Iterable[str] | None = None, + ignore: Iterable[str] | None = None, + ) -> None: + """Validate butler configuration. + + Checks that each `DatasetType` can be stored in the `Datastore`. + + Parameters + ---------- + logFailures : `bool`, optional + If `True`, output a log message for every validation error + detected. + datasetTypeNames : iterable of `str`, optional + The `DatasetType` names that should be checked. This allows + only a subset to be selected. + ignore : iterable of `str`, optional + Names of DatasetTypes to skip over. This can be used to skip + known problems. If a named `DatasetType` corresponds to a + composite, all components of that `DatasetType` will also be + ignored. + + Raises + ------ + ButlerValidationError + Raised if there is some inconsistency with how this Butler + is configured. + """ + raise NotImplementedError() + + @property + @abstractmethod + def collections(self) -> Sequence[str]: + """The collections to search by default, in order + (`~collections.abc.Sequence` [ `str` ]). + """ + raise NotImplementedError() + + @property + @abstractmethod + def run(self) -> str | None: + """Name of the run this butler writes outputs to by default (`str` or + `None`). + """ + raise NotImplementedError() + + @property + @abstractmethod + def registry(self) -> Registry: + """The object that manages dataset metadata and relationships + (`Registry`). + + Many operations that don't involve reading or writing butler datasets + are accessible only via `Registry` methods. Eventually these methods + will be replaced by equivalent `Butler` methods. + """ + raise NotImplementedError() diff --git a/python/lsst/daf/butler/_quantum_backed.py b/python/lsst/daf/butler/_quantum_backed.py index 5d3edb797d..fd33107e23 100644 --- a/python/lsst/daf/butler/_quantum_backed.py +++ b/python/lsst/daf/butler/_quantum_backed.py @@ -661,7 +661,7 @@ class QuantumProvenanceData(_BaseModelCompat): def collect_and_transfer( butler: Butler, quanta: Iterable[Quantum], provenance: Iterable[QuantumProvenanceData] ) -> None: - """Transfer output datasets from multiple quanta to a more permantent + """Transfer output datasets from multiple quanta to a more permanent `Butler` repository. Parameters diff --git a/python/lsst/daf/butler/_registry_shim.py b/python/lsst/daf/butler/_registry_shim.py index 4ba989e829..67f50a16e1 100644 --- a/python/lsst/daf/butler/_registry_shim.py +++ b/python/lsst/daf/butler/_registry_shim.py @@ -54,7 +54,7 @@ from .registry.queries import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults if TYPE_CHECKING: - from ._butler import Butler + from .direct_butler import DirectButler from .registry._registry import CollectionArgType from .registry.interfaces import ObsCoreTableManager @@ -64,7 +64,7 @@ class RegistryShim(Registry): Parameters ---------- - butler : `Butler` + butler : `DirectButler` Data butler instance. Notes @@ -75,7 +75,7 @@ class RegistryShim(Registry): while we perform re-structuring of Registry and Butler implementations. """ - def __init__(self, butler: Butler): + def __init__(self, butler: DirectButler): self._butler = butler self._registry = butler._registry diff --git a/python/lsst/daf/butler/direct_butler.py b/python/lsst/daf/butler/direct_butler.py index a8a3e0a18d..9572d80762 100644 --- a/python/lsst/daf/butler/direct_butler.py +++ b/python/lsst/daf/butler/direct_butler.py @@ -30,7 +30,7 @@ from __future__ import annotations __all__ = ( - "Butler", + "DirectButler", "ButlerValidationError", ) @@ -47,14 +47,13 @@ from deprecated.sphinx import deprecated from lsst.resources import ResourcePath, ResourcePathExpression -from lsst.utils import doImportType from lsst.utils.introspection import get_class_of from lsst.utils.logging import VERBOSE, getLogger from sqlalchemy.exc import IntegrityError +from ._butler import Butler from ._butler_config import ButlerConfig -from ._butler_repo_index import ButlerRepoIndex -from ._config import Config, ConfigSubset +from ._config import Config from ._dataset_existence import DatasetExistence from ._dataset_ref import DatasetIdGenEnum, DatasetRef from ._dataset_type import DatasetType @@ -71,7 +70,6 @@ DataId, DataIdValue, Dimension, - DimensionConfig, DimensionElement, DimensionRecord, DimensionUniverse, @@ -84,12 +82,10 @@ MissingDatasetTypeError, NoDefaultCollectionError, Registry, - RegistryConfig, RegistryDefaults, _ButlerRegistry, _RegistryFactory, ) -from .repo_relocation import BUTLER_ROOT_TAG from .transfers import RepoExportContext from .utils import transactional @@ -107,7 +103,7 @@ class ButlerValidationError(ValidationError): pass -class Butler(LimitedButler): +class DirectButler(Butler): """Main entry point for the data access system. Parameters @@ -117,7 +113,7 @@ class Butler(LimitedButler): `ButlerConfig` constructor. If a directory path is given the configuration will be read from a ``butler.yaml`` file in that location. If `None` is given default values will be used. - butler : `Butler`, optional. + butler : `DirectButler`, optional. If provided, construct a new Butler that uses the same registry and datastore as the given one, but with the given collection and run. Incompatible with the ``config``, ``searchPaths``, and ``writeable`` @@ -202,7 +198,7 @@ def __init__( self, config: Config | ResourcePathExpression | None = None, *, - butler: Butler | None = None, + butler: DirectButler | None = None, collections: Any = None, run: str | None = None, searchPaths: Sequence[ResourcePathExpression] | None = None, @@ -275,212 +271,6 @@ def _retrieve_dataset_type(self, name: str) -> DatasetType | None: except MissingDatasetTypeError: return None - @classmethod - def get_repo_uri(cls, label: str, return_label: bool = False) -> ResourcePath: - """Look up the label in a butler repository index. - - Parameters - ---------- - label : `str` - Label of the Butler repository to look up. - return_label : `bool`, optional - If ``label`` cannot be found in the repository index (either - because index is not defined or ``label`` is not in the index) and - ``return_label`` is `True` then return ``ResourcePath(label)``. - If ``return_label`` is `False` (default) then an exception will be - raised instead. - - Returns - ------- - uri : `lsst.resources.ResourcePath` - URI to the Butler repository associated with the given label or - default value if it is provided. - - Raises - ------ - KeyError - Raised if the label is not found in the index, or if an index - is not defined, and ``return_label`` is `False`. - - Notes - ----- - See `~lsst.daf.butler.ButlerRepoIndex` for details on how the - information is discovered. - """ - return ButlerRepoIndex.get_repo_uri(label, return_label) - - @classmethod - def get_known_repos(cls) -> set[str]: - """Retrieve the list of known repository labels. - - Returns - ------- - repos : `set` of `str` - All the known labels. Can be empty if no index can be found. - - Notes - ----- - See `~lsst.daf.butler.ButlerRepoIndex` for details on how the - information is discovered. - """ - return ButlerRepoIndex.get_known_repos() - - @staticmethod - def makeRepo( - root: ResourcePathExpression, - config: Config | str | None = None, - dimensionConfig: Config | str | None = None, - standalone: bool = False, - searchPaths: list[str] | None = None, - forceConfigRoot: bool = True, - outfile: ResourcePathExpression | None = None, - overwrite: bool = False, - ) -> Config: - """Create an empty data repository by adding a butler.yaml config - to a repository root directory. - - Parameters - ---------- - root : `lsst.resources.ResourcePathExpression` - Path or URI to the root location of the new repository. Will be - created if it does not exist. - config : `Config` or `str`, optional - Configuration to write to the repository, after setting any - root-dependent Registry or Datastore config options. Can not - be a `ButlerConfig` or a `ConfigSubset`. If `None`, default - configuration will be used. Root-dependent config options - specified in this config are overwritten if ``forceConfigRoot`` - is `True`. - dimensionConfig : `Config` or `str`, optional - Configuration for dimensions, will be used to initialize registry - database. - standalone : `bool` - If True, write all expanded defaults, not just customized or - repository-specific settings. - This (mostly) decouples the repository from the default - configuration, insulating it from changes to the defaults (which - may be good or bad, depending on the nature of the changes). - Future *additions* to the defaults will still be picked up when - initializing `Butlers` to repos created with ``standalone=True``. - searchPaths : `list` of `str`, optional - Directory paths to search when calculating the full butler - configuration. - forceConfigRoot : `bool`, optional - If `False`, any values present in the supplied ``config`` that - would normally be reset are not overridden and will appear - directly in the output config. This allows non-standard overrides - of the root directory for a datastore or registry to be given. - If this parameter is `True` the values for ``root`` will be - forced into the resulting config if appropriate. - outfile : `lss.resources.ResourcePathExpression`, optional - If not-`None`, the output configuration will be written to this - location rather than into the repository itself. Can be a URI - string. Can refer to a directory that will be used to write - ``butler.yaml``. - overwrite : `bool`, optional - Create a new configuration file even if one already exists - in the specified output location. Default is to raise - an exception. - - Returns - ------- - config : `Config` - The updated `Config` instance written to the repo. - - Raises - ------ - ValueError - Raised if a ButlerConfig or ConfigSubset is passed instead of a - regular Config (as these subclasses would make it impossible to - support ``standalone=False``). - FileExistsError - Raised if the output config file already exists. - os.error - Raised if the directory does not exist, exists but is not a - directory, or cannot be created. - - Notes - ----- - Note that when ``standalone=False`` (the default), the configuration - search path (see `ConfigSubset.defaultSearchPaths`) that was used to - construct the repository should also be used to construct any Butlers - to avoid configuration inconsistencies. - """ - if isinstance(config, ButlerConfig | ConfigSubset): - raise ValueError("makeRepo must be passed a regular Config without defaults applied.") - - # Ensure that the root of the repository exists or can be made - root_uri = ResourcePath(root, forceDirectory=True) - root_uri.mkdir() - - config = Config(config) - - # If we are creating a new repo from scratch with relative roots, - # do not propagate an explicit root from the config file - if "root" in config: - del config["root"] - - full = ButlerConfig(config, searchPaths=searchPaths) # this applies defaults - imported_class = doImportType(full["datastore", "cls"]) - if not issubclass(imported_class, Datastore): - raise TypeError(f"Imported datastore class {full['datastore', 'cls']} is not a Datastore") - datastoreClass: type[Datastore] = imported_class - datastoreClass.setConfigRoot(BUTLER_ROOT_TAG, config, full, overwrite=forceConfigRoot) - - # if key exists in given config, parse it, otherwise parse the defaults - # in the expanded config - if config.get(("registry", "db")): - registryConfig = RegistryConfig(config) - else: - registryConfig = RegistryConfig(full) - defaultDatabaseUri = registryConfig.makeDefaultDatabaseUri(BUTLER_ROOT_TAG) - if defaultDatabaseUri is not None: - Config.updateParameters( - RegistryConfig, config, full, toUpdate={"db": defaultDatabaseUri}, overwrite=forceConfigRoot - ) - else: - Config.updateParameters(RegistryConfig, config, full, toCopy=("db",), overwrite=forceConfigRoot) - - if standalone: - config.merge(full) - else: - # Always expand the registry.managers section into the per-repo - # config, because after the database schema is created, it's not - # allowed to change anymore. Note that in the standalone=True - # branch, _everything_ in the config is expanded, so there's no - # need to special case this. - Config.updateParameters(RegistryConfig, config, full, toMerge=("managers",), overwrite=False) - configURI: ResourcePathExpression - if outfile is not None: - # When writing to a separate location we must include - # the root of the butler repo in the config else it won't know - # where to look. - config["root"] = root_uri.geturl() - configURI = outfile - else: - configURI = root_uri - # Strip obscore configuration, if it is present, before writing config - # to a file, obscore config will be stored in registry. - if (obscore_config_key := ("registry", "managers", "obscore", "config")) in config: - config_to_write = config.copy() - del config_to_write[obscore_config_key] - config_to_write.dumpToUri(configURI, overwrite=overwrite) - # configFile attribute is updated, need to copy it to original. - config.configFile = config_to_write.configFile - else: - config.dumpToUri(configURI, overwrite=overwrite) - - # Create Registry and populate tables - registryConfig = RegistryConfig(config.get("registry")) - dimensionConfig = DimensionConfig(dimensionConfig) - _RegistryFactory(registryConfig).create_from_config( - dimensionConfig=dimensionConfig, butlerRoot=root_uri - ) - - log.verbose("Wrote new Butler configuration file to %s", configURI) - - return config - @classmethod def _unpickle( cls, @@ -489,7 +279,7 @@ def _unpickle( run: str | None, defaultDataId: dict[str, str], writeable: bool, - ) -> Butler: + ) -> DirectButler: """Callable used to unpickle a Butler. We prefer not to use ``Butler.__init__`` directly so we can force some @@ -529,7 +319,7 @@ def _unpickle( def __reduce__(self) -> tuple: """Support pickling.""" return ( - Butler._unpickle, + DirectButler._unpickle, ( self._config, self.collections, @@ -545,7 +335,7 @@ def __str__(self) -> str: ) def isWriteable(self) -> bool: - """Return `True` if this `Butler` supports write operations.""" + # Docstring inherited. return self._registry.isWriteable() @contextlib.contextmanager @@ -1268,7 +1058,7 @@ def getDirectDeferred( self, ref: DatasetRef, *, - parameters: dict | None = None, + parameters: dict[str, Any] | None = None, storageClass: str | StorageClass | None = None, ) -> DeferredDatasetHandle: """Create a `DeferredDatasetHandle` which can later retrieve a dataset, @@ -1571,41 +1361,7 @@ def retrieveArtifacts( preserve_path: bool = True, overwrite: bool = False, ) -> list[ResourcePath]: - """Retrieve the artifacts associated with the supplied refs. - - Parameters - ---------- - refs : iterable of `DatasetRef` - The datasets for which artifacts are to be retrieved. - A single ref can result in multiple artifacts. The refs must - be resolved. - destination : `lsst.resources.ResourcePath` or `str` - Location to write the artifacts. - transfer : `str`, optional - Method to use to transfer the artifacts. Must be one of the options - supported by `~lsst.resources.ResourcePath.transfer_from()`. - "move" is not allowed. - preserve_path : `bool`, optional - If `True` the full path of the artifact within the datastore - is preserved. If `False` the final file component of the path - is used. - overwrite : `bool`, optional - If `True` allow transfers to overwrite existing files at the - destination. - - Returns - ------- - targets : `list` of `lsst.resources.ResourcePath` - URIs of file artifacts in destination location. Order is not - preserved. - - Notes - ----- - For non-file datastores the artifacts written to the destination - may not match the representation inside the datastore. For example - a hierarchical data structure in a NoSQL database may well be stored - as a JSON file. - """ + # Docstring inherited. return self._datastore.retrieveArtifacts( refs, ResourcePath(destination), @@ -1624,40 +1380,7 @@ def exists( collections: Any = None, **kwargs: Any, ) -> DatasetExistence: - """Indicate whether a dataset is known to Butler registry and - datastore. - - Parameters - ---------- - dataset_ref_or_type : `DatasetRef`, `DatasetType`, or `str` - When `DatasetRef` the `dataId` should be `None`. - Otherwise the `DatasetType` or name thereof. - data_id : `dict` or `DataCoordinate` - A `dict` of `Dimension` link name, value pairs that label the - `DatasetRef` within a Collection. When `None`, a `DatasetRef` - should be provided as the first argument. - full_check : `bool`, optional - If `True`, an additional check will be made for dataset artifact - existence. This will involve additional overhead due to the need - to query an external system. If `False` registry and datastore - will solely be asked if they know about the dataset but no - check for the artifact will be performed. - collections : Any, optional - Collections to be searched, overriding ``self.collections``. - Can be any of the types supported by the ``collections`` argument - to butler construction. - **kwargs - Additional keyword arguments used to augment or construct a - `DataCoordinate`. See `DataCoordinate.standardize` - parameters. - - Returns - ------- - existence : `DatasetExistence` - Object indicating whether the dataset is known to registry and - datastore. Evaluates to `True` if the dataset is present and known - to both. - """ + # Docstring inherited. existence = DatasetExistence.UNRECOGNIZED if isinstance(dataset_ref_or_type, DatasetRef): @@ -1708,30 +1431,7 @@ def _exists_many( *, full_check: bool = True, ) -> dict[DatasetRef, DatasetExistence]: - """Indicate whether multiple datasets are known to Butler registry and - datastore. - - This is an experimental API that may change at any moment. - - Parameters - ---------- - refs : iterable of `DatasetRef` - The datasets to be checked. - full_check : `bool`, optional - If `True`, an additional check will be made for dataset artifact - existence. This will involve additional overhead due to the need - to query an external system. If `False` registry and datastore - will solely be asked if they know about the dataset but no - check for the artifact will be performed. - - Returns - ------- - existence : dict of [`DatasetRef`, `DatasetExistence`] - Mapping from the given dataset refs to an enum indicating the - status of the dataset in registry and datastore. - Each value evaluates to `True` if the dataset is present and known - to both. - """ + # Docstring inherited. existence = {ref: DatasetExistence.UNRECOGNIZED for ref in refs} # Registry does not have a bulk API to check for a ref. @@ -1824,26 +1524,7 @@ def datasetExists( return self._datastore.exists(ref) def removeRuns(self, names: Iterable[str], unstore: bool = True) -> None: - """Remove one or more `~CollectionType.RUN` collections and the - datasets within them. - - Parameters - ---------- - names : `~collections.abc.Iterable` [ `str` ] - The names of the collections to remove. - unstore : `bool`, optional - If `True` (default), delete datasets from all datastores in which - they are present, and attempt to rollback the registry deletions if - datastore deletions fail (which may not always be possible). If - `False`, datastore records for these datasets are still removed, - but any artifacts (e.g. files) will not be. - - Raises - ------ - TypeError - Raised if one or more collections are not of type - `~CollectionType.RUN`. - """ + # Docstring inherited. if not self.isWriteable(): raise TypeError("Butler is read-only.") names = list(names) @@ -1938,66 +1619,7 @@ def ingest( idGenerationMode: DatasetIdGenEnum | None = None, record_validation_info: bool = True, ) -> None: - """Store and register one or more datasets that already exist on disk. - - Parameters - ---------- - datasets : `FileDataset` - Each positional argument is a struct containing information about - a file to be ingested, including its URI (either absolute or - relative to the datastore root, if applicable), a resolved - `DatasetRef`, and optionally a formatter class or its - fully-qualified string name. If a formatter is not provided, the - formatter that would be used for `put` is assumed. On successful - ingest all `FileDataset.formatter` attributes will be set to the - formatter class used. `FileDataset.path` attributes may be modified - to put paths in whatever the datastore considers a standardized - form. - transfer : `str`, optional - If not `None`, must be one of 'auto', 'move', 'copy', 'direct', - 'split', 'hardlink', 'relsymlink' or 'symlink', indicating how to - transfer the file. - run : `str`, optional - The name of the run ingested datasets should be added to, - overriding ``self.run``. This parameter is now deprecated since - the run is encoded in the ``FileDataset``. - idGenerationMode : `DatasetIdGenEnum`, optional - Specifies option for generating dataset IDs. Parameter is - deprecated. - record_validation_info : `bool`, optional - If `True`, the default, the datastore can record validation - information associated with the file. If `False` the datastore - will not attempt to track any information such as checksums - or file sizes. This can be useful if such information is tracked - in an external system or if the file is to be compressed in place. - It is up to the datastore whether this parameter is relevant. - - Raises - ------ - TypeError - Raised if the butler is read-only or if no run was provided. - NotImplementedError - Raised if the `Datastore` does not support the given transfer mode. - DatasetTypeNotSupportedError - Raised if one or more files to be ingested have a dataset type that - is not supported by the `Datastore`.. - FileNotFoundError - Raised if one of the given files does not exist. - FileExistsError - Raised if transfer is not `None` but the (internal) location the - file would be moved to is already occupied. - - Notes - ----- - This operation is not fully exception safe: if a database operation - fails, the given `FileDataset` instances may be only partially updated. - - It is atomic in terms of database operations (they will either all - succeed or all fail) providing the database engine implements - transactions correctly. It will attempt to be atomic in terms of - filesystem operations as well, but this cannot be implemented - rigorously for most datastores. - """ + # Docstring inherited. if not self.isWriteable(): raise TypeError("Butler is read-only.") @@ -2122,49 +1744,7 @@ def export( format: str | None = None, transfer: str | None = None, ) -> Iterator[RepoExportContext]: - """Export datasets from the repository represented by this `Butler`. - - This method is a context manager that returns a helper object - (`RepoExportContext`) that is used to indicate what information from - the repository should be exported. - - Parameters - ---------- - directory : `str`, optional - Directory dataset files should be written to if ``transfer`` is not - `None`. - filename : `str`, optional - Name for the file that will include database information associated - with the exported datasets. If this is not an absolute path and - ``directory`` is not `None`, it will be written to ``directory`` - instead of the current working directory. Defaults to - "export.{format}". - format : `str`, optional - File format for the database information file. If `None`, the - extension of ``filename`` will be used. - transfer : `str`, optional - Transfer mode passed to `Datastore.export`. - - Raises - ------ - TypeError - Raised if the set of arguments passed is inconsistent. - - Examples - -------- - Typically the `Registry.queryDataIds` and `Registry.queryDatasets` - methods are used to provide the iterables over data IDs and/or datasets - to be exported:: - - with butler.export("exports.yaml") as export: - # Export all flats, but none of the dimension element rows - # (i.e. data ID information) associated with them. - export.saveDatasets(butler.registry.queryDatasets("flat"), - elements=()) - # Export all datasets that start with "deepCoadd_" and all of - # their associated data ID information. - export.saveDatasets(butler.registry.queryDatasets("deepCoadd_*")) - """ + # Docstring inherited. if directory is None and transfer is not None: raise TypeError("Cannot transfer without providing a directory.") if transfer == "move": @@ -2206,37 +1786,7 @@ def import_( transfer: str | None = None, skip_dimensions: set | None = None, ) -> None: - """Import datasets into this repository that were exported from a - different butler repository via `~lsst.daf.butler.Butler.export`. - - Parameters - ---------- - directory : `~lsst.resources.ResourcePathExpression`, optional - Directory containing dataset files to import from. If `None`, - ``filename`` and all dataset file paths specified therein must - be absolute. - filename : `~lsst.resources.ResourcePathExpression` or `TextIO` - A stream or name of file that contains database information - associated with the exported datasets, typically generated by - `~lsst.daf.butler.Butler.export`. If this a string (name) or - `~lsst.resources.ResourcePath` and is not an absolute path, - it will first be looked for relative to ``directory`` and if not - found there it will be looked for in the current working - directory. Defaults to "export.{format}". - format : `str`, optional - File format for ``filename``. If `None`, the extension of - ``filename`` will be used. - transfer : `str`, optional - Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`. - skip_dimensions : `set`, optional - Names of dimensions that should be skipped and not imported. - - Raises - ------ - TypeError - Raised if the set of arguments passed is inconsistent, or if the - butler is read-only. - """ + # Docstring inherited. if not self.isWriteable(): raise TypeError("Butler is read-only.") if format is None: @@ -2306,48 +1856,7 @@ def transfer_from( register_dataset_types: bool = False, transfer_dimensions: bool = False, ) -> collections.abc.Collection[DatasetRef]: - """Transfer datasets to this Butler from a run in another Butler. - - Parameters - ---------- - source_butler : `LimitedButler` - Butler from which the datasets are to be transferred. If data IDs - in ``source_refs`` are not expanded then this has to be a full - `Butler` whose registry will be used to expand data IDs. - source_refs : iterable of `DatasetRef` - Datasets defined in the source butler that should be transferred to - this butler. - transfer : `str`, optional - Transfer mode passed to `~lsst.daf.butler.Datastore.transfer_from`. - skip_missing : `bool` - If `True`, datasets with no datastore artifact associated with - them are not transferred. If `False` a registry entry will be - created even if no datastore record is created (and so will - look equivalent to the dataset being unstored). - register_dataset_types : `bool` - If `True` any missing dataset types are registered. Otherwise - an exception is raised. - transfer_dimensions : `bool`, optional - If `True`, dimension record data associated with the new datasets - will be transferred. - - Returns - ------- - refs : `list` of `DatasetRef` - The refs added to this Butler. - - Notes - ----- - The datastore artifact has to exist for a transfer - to be made but non-existence is not an error. - - Datasets that already exist in this run will be skipped. - - The datasets are imported as part of a transaction, although - dataset types are registered before the transaction is started. - This means that it is possible for a dataset type to be registered - even though transfer has failed. - """ + # Docstring inherited. if not self.isWriteable(): raise TypeError("Butler is read-only.") progress = Progress("lsst.daf.butler.Butler.transfer_from", level=VERBOSE) @@ -2519,30 +2028,7 @@ def validateConfiguration( datasetTypeNames: Iterable[str] | None = None, ignore: Iterable[str] | None = None, ) -> None: - """Validate butler configuration. - - Checks that each `DatasetType` can be stored in the `Datastore`. - - Parameters - ---------- - logFailures : `bool`, optional - If `True`, output a log message for every validation error - detected. - datasetTypeNames : iterable of `str`, optional - The `DatasetType` names that should be checked. This allows - only a subset to be selected. - ignore : iterable of `str`, optional - Names of DatasetTypes to skip over. This can be used to skip - known problems. If a named `DatasetType` corresponds to a - composite, all components of that `DatasetType` will also be - ignored. - - Raises - ------ - ButlerValidationError - Raised if there is some inconsistency with how this Butler - is configured. - """ + # Docstring inherited. if datasetTypeNames: datasetTypes = [self._registry.getDatasetType(name) for name in datasetTypeNames] else: diff --git a/python/lsst/daf/butler/script/_associate.py b/python/lsst/daf/butler/script/_associate.py index ef6ceb878d..5e1943b981 100644 --- a/python/lsst/daf/butler/script/_associate.py +++ b/python/lsst/daf/butler/script/_associate.py @@ -42,7 +42,7 @@ def associate( find_first: bool, ) -> None: """Add existing datasets to a CHAINED collection.""" - butler = Butler(repo, writeable=True) + butler = Butler.from_config(repo, writeable=True) butler.registry.registerCollection(collection, CollectionType.TAGGED) diff --git a/python/lsst/daf/butler/script/_pruneDatasets.py b/python/lsst/daf/butler/script/_pruneDatasets.py index 9b1c318af2..17f27d9bc7 100644 --- a/python/lsst/daf/butler/script/_pruneDatasets.py +++ b/python/lsst/daf/butler/script/_pruneDatasets.py @@ -218,7 +218,7 @@ def pruneDatasets( if not collections: return PruneDatasetsResult(state=PruneDatasetsResult.State.ERR_NO_COLLECTION_RESTRICTION) - butler = Butler(repo) + butler = Butler.from_config(repo) # If purging, verify that the collection to purge is RUN type collection. if purge_run: @@ -253,7 +253,7 @@ def pruneDatasets( return result def doPruneDatasets() -> PruneDatasetsResult: - butler = Butler(repo, writeable=True) + butler = Butler.from_config(repo, writeable=True) butler.pruneDatasets( refs=datasets_found.getDatasets(), disassociate=disassociate, diff --git a/python/lsst/daf/butler/script/butlerImport.py b/python/lsst/daf/butler/script/butlerImport.py index 37aba0662c..a4af72b85a 100644 --- a/python/lsst/daf/butler/script/butlerImport.py +++ b/python/lsst/daf/butler/script/butlerImport.py @@ -59,7 +59,7 @@ def butlerImport( skip_dimensions : `list`, or `None` Dimensions that should be skipped. """ - butler = Butler(repo, writeable=True) + butler = Butler.from_config(repo, writeable=True) if skip_dimensions is not None: skip_dimensions = set(skip_dimensions) diff --git a/python/lsst/daf/butler/script/certifyCalibrations.py b/python/lsst/daf/butler/script/certifyCalibrations.py index 6f99f0fc06..42bdb53458 100644 --- a/python/lsst/daf/butler/script/certifyCalibrations.py +++ b/python/lsst/daf/butler/script/certifyCalibrations.py @@ -69,7 +69,7 @@ def certifyCalibrations( Search all children of the inputCollection if it is a CHAINED collection, instead of just the most recent one. """ - butler = Butler(repo, writeable=True, without_datastore=True) + butler = Butler.from_config(repo, writeable=True, without_datastore=True) registry = butler.registry timespan = Timespan( begin=astropy.time.Time(begin_date, scale="tai") if begin_date is not None else None, diff --git a/python/lsst/daf/butler/script/collectionChain.py b/python/lsst/daf/butler/script/collectionChain.py index ba6d53ecd5..888baede11 100644 --- a/python/lsst/daf/butler/script/collectionChain.py +++ b/python/lsst/daf/butler/script/collectionChain.py @@ -71,7 +71,7 @@ def collectionChain( chain : `tuple` of `str` The collections in the chain following this command. """ - butler = Butler(repo, writeable=True, without_datastore=True) + butler = Butler.from_config(repo, writeable=True, without_datastore=True) # Every mode needs children except pop. if not children and mode != "pop": diff --git a/python/lsst/daf/butler/script/configValidate.py b/python/lsst/daf/butler/script/configValidate.py index 83b6c0b921..2f71319302 100644 --- a/python/lsst/daf/butler/script/configValidate.py +++ b/python/lsst/daf/butler/script/configValidate.py @@ -52,7 +52,7 @@ def configValidate(repo: str, quiet: bool, dataset_type: list[str], ignore: list error. """ logFailures = not quiet - butler = Butler(config=repo) + butler = Butler.from_config(config=repo) is_good = True try: butler.validateConfiguration(logFailures=logFailures, datasetTypeNames=dataset_type, ignore=ignore) diff --git a/python/lsst/daf/butler/script/exportCalibs.py b/python/lsst/daf/butler/script/exportCalibs.py index ae28e8b35d..1406f0a132 100644 --- a/python/lsst/daf/butler/script/exportCalibs.py +++ b/python/lsst/daf/butler/script/exportCalibs.py @@ -122,7 +122,7 @@ def exportCalibs( RuntimeError : Raised if the output directory already exists. """ - butler = Butler(repo, writeable=False) + butler = Butler.from_config(repo, writeable=False) dataset_type_query = dataset_type or ... collections_query = collections or ... diff --git a/python/lsst/daf/butler/script/ingest_files.py b/python/lsst/daf/butler/script/ingest_files.py index df51f25da6..e4e645229b 100644 --- a/python/lsst/daf/butler/script/ingest_files.py +++ b/python/lsst/daf/butler/script/ingest_files.py @@ -105,7 +105,7 @@ def ingest_files( id_gen_mode = DatasetIdGenEnum.__members__[id_generation_mode] # Create the butler with the relevant run attached. - butler = Butler(repo, run=run) + butler = Butler.from_config(repo, run=run) datasetType = butler.registry.getDatasetType(dataset_type) diff --git a/python/lsst/daf/butler/script/queryCollections.py b/python/lsst/daf/butler/script/queryCollections.py index a0977d1d97..4358d23c7d 100644 --- a/python/lsst/daf/butler/script/queryCollections.py +++ b/python/lsst/daf/butler/script/queryCollections.py @@ -68,7 +68,7 @@ def _getTable( names=("Name", typeCol, descriptionCol), dtype=(str, str, str), ) - butler = Butler(repo) + butler = Butler.from_config(repo) names = sorted( butler.registry.queryCollections(collectionTypes=frozenset(collection_type), expression=glob or ...) ) @@ -140,7 +140,7 @@ def _getTree( names=("Name", "Type"), dtype=(str, str), ) - butler = Butler(repo, without_datastore=True) + butler = Butler.from_config(repo, without_datastore=True) def addCollection(name: str, level: int = 0) -> None: collectionType = butler.registry.getCollectionType(name) @@ -168,7 +168,7 @@ def _getFlatten( glob: Iterable[str], collection_type: Iterable[CollectionType], ) -> Table: - butler = Butler(repo) + butler = Butler.from_config(repo) collectionNames = list( butler.registry.queryCollections( collectionTypes=frozenset(collection_type), flattenChains=True, expression=glob or ... diff --git a/python/lsst/daf/butler/script/queryDataIds.py b/python/lsst/daf/butler/script/queryDataIds.py index cb70f114d3..415d2652d8 100644 --- a/python/lsst/daf/butler/script/queryDataIds.py +++ b/python/lsst/daf/butler/script/queryDataIds.py @@ -34,8 +34,9 @@ import numpy as np from astropy.table import Table as AstropyTable -from .._butler import Butler, DataCoordinate +from .._butler import Butler from ..cli.utils import sortAstropyTable +from ..dimensions import DataCoordinate if TYPE_CHECKING: from lsst.daf.butler import DimensionGraph @@ -109,7 +110,7 @@ def queryDataIds( Docstring for supported parameters is the same as `~lsst.daf.butler.Registry.queryDataIds`. """ - butler = Butler(repo, without_datastore=True) + butler = Butler.from_config(repo, without_datastore=True) if datasets and collections and not dimensions: # Determine the dimensions relevant to all given dataset types. diff --git a/python/lsst/daf/butler/script/queryDatasetTypes.py b/python/lsst/daf/butler/script/queryDatasetTypes.py index 4c1eafd5e2..efe9aeaeb0 100644 --- a/python/lsst/daf/butler/script/queryDatasetTypes.py +++ b/python/lsst/daf/butler/script/queryDatasetTypes.py @@ -61,7 +61,7 @@ def queryDatasetTypes(repo: str, verbose: bool, glob: Iterable[str], components: A dict whose key is "datasetTypes" and whose value is a list of collection names. """ - butler = Butler(repo, without_datastore=True) + butler = Butler.from_config(repo, without_datastore=True) expression = glob or ... datasetTypes = butler.registry.queryDatasetTypes(components=components, expression=expression) if verbose: diff --git a/python/lsst/daf/butler/script/queryDatasets.py b/python/lsst/daf/butler/script/queryDatasets.py index e6b17a79ca..4a7cac38f3 100644 --- a/python/lsst/daf/butler/script/queryDatasets.py +++ b/python/lsst/daf/butler/script/queryDatasets.py @@ -175,7 +175,7 @@ def __init__( raise RuntimeError("One of repo and butler must be provided and the other must be None.") # show_uri requires a datastore. without_datastore = not show_uri - self.butler = butler or Butler(repo, without_datastore=without_datastore) + self.butler = butler or Butler.from_config(repo, without_datastore=without_datastore) self._getDatasets(glob, collections, where, find_first) self.showUri = show_uri diff --git a/python/lsst/daf/butler/script/queryDimensionRecords.py b/python/lsst/daf/butler/script/queryDimensionRecords.py index 8f26af86be..88197cf2bf 100644 --- a/python/lsst/daf/butler/script/queryDimensionRecords.py +++ b/python/lsst/daf/butler/script/queryDimensionRecords.py @@ -54,7 +54,7 @@ def queryDimensionRecords( `~lsst.daf.butler.Registry.queryDimensionRecords` except for ``no_check``, which is the inverse of ``check``. """ - butler = Butler(repo, without_datastore=True) + butler = Butler.from_config(repo, without_datastore=True) query_collections: Iterable[str] | EllipsisType | None = None if datasets: diff --git a/python/lsst/daf/butler/script/register_dataset_type.py b/python/lsst/daf/butler/script/register_dataset_type.py index 4de6f31a6d..f46fda8817 100644 --- a/python/lsst/daf/butler/script/register_dataset_type.py +++ b/python/lsst/daf/butler/script/register_dataset_type.py @@ -69,7 +69,7 @@ def register_dataset_type( be created by this command. They are always derived from the composite dataset type. """ - butler = Butler(repo, writeable=True, without_datastore=True) + butler = Butler.from_config(repo, writeable=True, without_datastore=True) composite, component = DatasetType.splitDatasetTypeName(dataset_type) if component: diff --git a/python/lsst/daf/butler/script/removeCollections.py b/python/lsst/daf/butler/script/removeCollections.py index e0ee80e21d..8dc49015ed 100644 --- a/python/lsst/daf/butler/script/removeCollections.py +++ b/python/lsst/daf/butler/script/removeCollections.py @@ -82,7 +82,7 @@ def _getCollectionInfo( collectionInfo : `CollectionInfo` Contains tables with run and non-run collection info. """ - butler = Butler(repo, without_datastore=True) + butler = Butler.from_config(repo, without_datastore=True) try: names = sorted( butler.registry.queryCollections( @@ -135,7 +135,7 @@ def removeCollections( def doRemove(collections: Table) -> None: """Perform the prune collection step.""" - butler = Butler(repo, writeable=True, without_datastore=True) + butler = Butler.from_config(repo, writeable=True, without_datastore=True) for name in collections["Collection"]: butler.registry.removeCollection(name) diff --git a/python/lsst/daf/butler/script/removeDatasetType.py b/python/lsst/daf/butler/script/removeDatasetType.py index 3279a6cc6e..4fe9e020b3 100644 --- a/python/lsst/daf/butler/script/removeDatasetType.py +++ b/python/lsst/daf/butler/script/removeDatasetType.py @@ -43,5 +43,5 @@ def removeDatasetType(repo: str, dataset_type_name: tuple[str, ...]) -> None: datasetTypeName : `str` The name of the dataset type to be removed. """ - butler = Butler(repo, writeable=True, without_datastore=True) + butler = Butler.from_config(repo, writeable=True, without_datastore=True) butler.registry.removeDatasetType(dataset_type_name) diff --git a/python/lsst/daf/butler/script/removeRuns.py b/python/lsst/daf/butler/script/removeRuns.py index 8259f9984e..1186e53b05 100644 --- a/python/lsst/daf/butler/script/removeRuns.py +++ b/python/lsst/daf/butler/script/removeRuns.py @@ -85,7 +85,7 @@ def _getCollectionInfo( datasets : `dict` [`str`, `int`] The dataset types and and how many will be removed. """ - butler = Butler(repo) + butler = Butler.from_config(repo) try: collectionNames = list( butler.registry.queryCollections( @@ -132,7 +132,7 @@ def removeRuns( def doRemove(runs: Sequence[RemoveRun]) -> None: """Perform the remove step.""" - butler = Butler(repo, writeable=True) + butler = Butler.from_config(repo, writeable=True) with butler.transaction(): for run in runs: for parent in run.parents: diff --git a/python/lsst/daf/butler/script/retrieveArtifacts.py b/python/lsst/daf/butler/script/retrieveArtifacts.py index 10edf446ac..01a4d4a11f 100644 --- a/python/lsst/daf/butler/script/retrieveArtifacts.py +++ b/python/lsst/daf/butler/script/retrieveArtifacts.py @@ -86,7 +86,7 @@ def retrieveArtifacts( query_types = dataset_type or ... query_collections: tuple[str, ...] | EllipsisType = collections or ... - butler = Butler(repo, writeable=False) + butler = Butler.from_config(repo, writeable=False) # Need to store in list so we can count the number to give some feedback # to caller. diff --git a/python/lsst/daf/butler/script/transferDatasets.py b/python/lsst/daf/butler/script/transferDatasets.py index c63835e109..845f37b87d 100644 --- a/python/lsst/daf/butler/script/transferDatasets.py +++ b/python/lsst/daf/butler/script/transferDatasets.py @@ -74,8 +74,8 @@ def transferDatasets( datasets. It can be more efficient to disable this if it is known that all dimensions exist. """ - source_butler = Butler(source, writeable=False) - dest_butler = Butler(dest, writeable=True) + source_butler = Butler.from_config(source, writeable=False) + dest_butler = Butler.from_config(dest, writeable=True) dataset_type_expr = dataset_type or ... collections_expr: tuple[str, ...] | EllipsisType = collections or ... diff --git a/python/lsst/daf/butler/server.py b/python/lsst/daf/butler/server.py index 7ee3a387f5..1839838954 100644 --- a/python/lsst/daf/butler/server.py +++ b/python/lsst/daf/butler/server.py @@ -84,21 +84,21 @@ def _generate_next_value_(name, start, count, last_values) -> str: # type: igno def _make_global_butler() -> None: global GLOBAL_READONLY_BUTLER, GLOBAL_READWRITE_BUTLER if GLOBAL_READONLY_BUTLER is None: - GLOBAL_READONLY_BUTLER = Butler(BUTLER_ROOT, writeable=False) + GLOBAL_READONLY_BUTLER = Butler.from_config(BUTLER_ROOT, writeable=False) if GLOBAL_READWRITE_BUTLER is None: - GLOBAL_READWRITE_BUTLER = Butler(BUTLER_ROOT, writeable=True) + GLOBAL_READWRITE_BUTLER = Butler.from_config(BUTLER_ROOT, writeable=True) def butler_readonly_dependency() -> Butler: """Return global read-only butler.""" _make_global_butler() - return Butler(butler=GLOBAL_READONLY_BUTLER) + return Butler.from_config(butler=GLOBAL_READONLY_BUTLER) def butler_readwrite_dependency() -> Butler: """Return read-write butler.""" _make_global_butler() - return Butler(butler=GLOBAL_READWRITE_BUTLER) + return Butler.from_config(butler=GLOBAL_READWRITE_BUTLER) def unpack_dataId(butler: Butler, data_id: SerializedDataCoordinate | None) -> DataCoordinate | None: diff --git a/python/lsst/daf/butler/tests/_testRepo.py b/python/lsst/daf/butler/tests/_testRepo.py index af121db9e6..eba08df974 100644 --- a/python/lsst/daf/butler/tests/_testRepo.py +++ b/python/lsst/daf/butler/tests/_testRepo.py @@ -116,7 +116,7 @@ def makeTestRepo( # not be ignored. # newConfig guards against location-related keywords like outfile newConfig = Butler.makeRepo(root, config=defaults, forceConfigRoot=False, **kwargs) - butler = Butler(newConfig, writeable=True) + butler = Butler.from_config(newConfig, writeable=True) dimensionRecords = _makeRecords(dataIds, butler.dimensions) for dimension, records in dimensionRecords.items(): if butler.dimensions[dimension].viewOf is None: @@ -154,7 +154,7 @@ def makeTestCollection(repo: Butler, uniqueId: str | None = None) -> Butler: # Speed matters more than cryptographic guarantees uniqueId = str(random.randrange(1_000_000_000)) collection = "test_" + uniqueId - return Butler(butler=repo, run=collection) + return Butler.from_config(butler=repo, run=collection) def _makeRecords(dataIds: Mapping[str, Iterable], universe: DimensionUniverse) -> Mapping[str, Iterable]: diff --git a/python/lsst/daf/butler/tests/utils.py b/python/lsst/daf/butler/tests/utils.py index 802498a0d0..fe1ccc7965 100644 --- a/python/lsst/daf/butler/tests/utils.py +++ b/python/lsst/daf/butler/tests/utils.py @@ -243,7 +243,7 @@ def __init__(self, root: str, configFile: str) -> None: # tag when looking up datasets. run = "ingest/run" tag = "ingest" - self.butler = Butler(butlerConfigFile, run=run, collections=[tag]) + self.butler = Butler.from_config(butlerConfigFile, run=run, collections=[tag]) self.butler.registry.registerCollection(tag, CollectionType.TAGGED) # Create and register a DatasetType diff --git a/tests/test_butler.py b/tests/test_butler.py index 9d185221ce..0dda39a51f 100644 --- a/tests/test_butler.py +++ b/tests/test_butler.py @@ -87,6 +87,7 @@ def mock_s3(*args: Any, **kwargs: Any) -> Any: # type: ignore[no-untyped-def] from lsst.daf.butler.datastore import NullDatastore from lsst.daf.butler.datastore.file_templates import FileTemplate, FileTemplateValidationError from lsst.daf.butler.datastores.fileDatastore import FileDatastore +from lsst.daf.butler.direct_butler import DirectButler from lsst.daf.butler.registries.sql import SqlRegistry from lsst.daf.butler.registry import ( CollectionError, @@ -210,8 +211,9 @@ def tearDown(self) -> None: def create_butler( self, run: str, storageClass: StorageClass | str, datasetTypeName: str - ) -> tuple[Butler, DatasetType]: - butler = Butler(self.tmpConfigFile, run=run) + ) -> tuple[DirectButler, DatasetType]: + butler = Butler.from_config(self.tmpConfigFile, run=run) + assert isinstance(butler, DirectButler), "Expect DirectButler in configuration" collections = set(butler.registry.queryCollections()) self.assertEqual(collections, {run}) @@ -258,7 +260,7 @@ def create_butler( ) return butler, datasetType - def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> Butler: + def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> DirectButler: # New datasets will be added to run and tag, but we will only look in # tag when looking up datasets. run = self.default_run @@ -512,7 +514,7 @@ def runPutGetTest(self, storageClass: StorageClass, datasetTypeName: str) -> But def testDeferredCollectionPassing(self) -> None: # Construct a butler with no run or collection, but make it writeable. - butler = Butler(self.tmpConfigFile, writeable=True) + butler = Butler.from_config(self.tmpConfigFile, writeable=True) # Create and register a DatasetType dimensions = butler.dimensions.extract(["instrument", "visit"]) datasetType = self.addDatasetType( @@ -576,17 +578,17 @@ def setUp(self) -> None: def testConstructor(self) -> None: """Independent test of constructor.""" - butler = Butler(self.tmpConfigFile, run=self.default_run) + butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) self.assertIsInstance(butler, Butler) # Check that butler.yaml is added automatically. if self.tmpConfigFile.endswith(end := "/butler.yaml"): config_dir = self.tmpConfigFile[: -len(end)] - butler = Butler(config_dir, run=self.default_run) + butler = Butler.from_config(config_dir, run=self.default_run) self.assertIsInstance(butler, Butler) # Even with a ResourcePath. - butler = Butler(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) + butler = Butler.from_config(ResourcePath(config_dir, forceDirectory=True), run=self.default_run) self.assertIsInstance(butler, Butler) collections = set(butler.registry.queryCollections()) @@ -594,11 +596,11 @@ def testConstructor(self) -> None: # Check that some special characters can be included in run name. special_run = "u@b.c-A" - butler_special = Butler(butler=butler, run=special_run) + butler_special = Butler.from_config(butler=butler, run=special_run) collections = set(butler_special.registry.queryCollections("*@*")) self.assertEqual(collections, {special_run}) - butler2 = Butler(butler=butler, collections=["other"]) + butler2 = Butler.from_config(butler=butler, collections=["other"]) self.assertEqual(butler2.collections, ("other",)) self.assertIsNone(butler2.run) self.assertIs(butler._datastore, butler2._datastore) @@ -619,17 +621,17 @@ def testConstructor(self) -> None: uri = Butler.get_repo_uri("bad_label") self.assertEqual(uri, ResourcePath(bad_label)) uri = Butler.get_repo_uri("label") - butler = Butler(uri, writeable=False) + butler = Butler.from_config(uri, writeable=False) self.assertIsInstance(butler, Butler) - butler = Butler("label", writeable=False) + butler = Butler.from_config("label", writeable=False) self.assertIsInstance(butler, Butler) with self.assertRaisesRegex(FileNotFoundError, "aliases:.*bad_label"): - Butler("not_there", writeable=False) + Butler.from_config("not_there", writeable=False) with self.assertRaisesRegex(FileNotFoundError, "resolved from alias 'bad_label'"): - Butler("bad_label") + Butler.from_config("bad_label") with self.assertRaises(FileNotFoundError): # Should ignore aliases. - Butler(ResourcePath("label", forceAbsolute=False)) + Butler.from_config(ResourcePath("label", forceAbsolute=False)) with self.assertRaises(KeyError) as cm: Butler.get_repo_uri("missing") self.assertEqual( @@ -644,24 +646,24 @@ def testConstructor(self) -> None: butler_index.dumpToUri(temp_file) with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): with self.assertRaisesRegex(FileNotFoundError, "(no known aliases)"): - Butler("label") + Butler.from_config("label") with ResourcePath.temporary_uri(suffix=suffix) as temp_file: # Now with bad contents. with open(temp_file.ospath, "w") as fh: print("'", file=fh) with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": str(temp_file)}): with self.assertRaisesRegex(FileNotFoundError, "(no known aliases:.*could not be read)"): - Butler("label") + Butler.from_config("label") with unittest.mock.patch.dict(os.environ, {"DAF_BUTLER_REPOSITORY_INDEX": "file://not_found/x.yaml"}): with self.assertRaises(FileNotFoundError): Butler.get_repo_uri("label") self.assertEqual(Butler.get_known_repos(), set()) with self.assertRaisesRegex(FileNotFoundError, "index file not found"): - Butler("label") + Butler.from_config("label") # Check that we can create Butler when the alias file is not found. - butler = Butler(self.tmpConfigFile, writeable=False) + butler = Butler.from_config(self.tmpConfigFile, writeable=False) self.assertIsInstance(butler, Butler) with self.assertRaises(KeyError) as cm: # No environment variable set. @@ -670,7 +672,7 @@ def testConstructor(self) -> None: self.assertIn("No repository index defined", str(cm.exception)) with self.assertRaisesRegex(FileNotFoundError, "no known aliases.*No repository index"): # No aliases registered. - Butler("not_there") + Butler.from_config("not_there") self.assertEqual(Butler.get_known_repos(), set()) def testBasicPutGet(self) -> None: @@ -842,7 +844,7 @@ def testPytypePutCoercion(self) -> None: self.assertEqual(get_full_type_name(test_dict3), "dict") def testIngest(self) -> None: - butler = Butler(self.tmpConfigFile, run=self.default_run) + butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) # Create and register a DatasetType dimensions = butler.dimensions.extract(["instrument", "visit", "detector"]) @@ -994,7 +996,8 @@ def testIngest(self) -> None: def testPickle(self) -> None: """Test pickle support.""" - butler = Butler(self.tmpConfigFile, run=self.default_run) + butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) + assert isinstance(butler, DirectButler), "Expect DirectButler in configuration" butlerOut = pickle.loads(pickle.dumps(butler)) self.assertIsInstance(butlerOut, Butler) self.assertEqual(butlerOut._config, butler._config) @@ -1002,7 +1005,7 @@ def testPickle(self) -> None: self.assertEqual(butlerOut.run, butler.run) def testGetDatasetTypes(self) -> None: - butler = Butler(self.tmpConfigFile, run=self.default_run) + butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) dimensions = butler.dimensions.extract(["instrument", "visit", "physical_filter"]) dimensionEntries: list[tuple[str, list[Mapping[str, Any]]]] = [ ( @@ -1076,7 +1079,7 @@ def testGetDatasetTypes(self) -> None: ) def testTransaction(self) -> None: - butler = Butler(self.tmpConfigFile, run=self.default_run) + butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) datasetTypeName = "test_metric" dimensions = butler.dimensions.extract(["instrument", "visit"]) dimensionEntries: tuple[tuple[str, Mapping[str, Any]], ...] = ( @@ -1133,10 +1136,12 @@ def testMakeRepo(self) -> None: butlerConfig = Butler.makeRepo(root1, config=Config(self.configFile)) limited = Config(self.configFile) - butler1 = Butler(butlerConfig) + butler1 = Butler.from_config(butlerConfig) + assert isinstance(butler1, DirectButler), "Expect DirectButler in configuration" butlerConfig = Butler.makeRepo(root2, standalone=True, config=Config(self.configFile)) full = Config(self.tmpConfigFile) - butler2 = Butler(butlerConfig) + butler2 = Butler.from_config(butlerConfig) + assert isinstance(butler2, DirectButler), "Expect DirectButler in configuration" # Butlers should have the same configuration regardless of whether # defaults were expanded. self.assertEqual(butler1._config, butler2._config) @@ -1156,13 +1161,13 @@ def testMakeRepo(self) -> None: # work properly with relocatable Butler repo butlerConfig.configFile = None with self.assertRaises(ValueError): - Butler(butlerConfig) + Butler.from_config(butlerConfig) with self.assertRaises(FileExistsError): Butler.makeRepo(self.root, standalone=True, config=Config(self.configFile), overwrite=False) def testStringification(self) -> None: - butler = Butler(self.tmpConfigFile, run=self.default_run) + butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) butlerStr = str(butler) if self.datastoreStr is not None: @@ -1178,7 +1183,7 @@ def testStringification(self) -> None: def testButlerRewriteDataId(self) -> None: """Test that dataIds can be rewritten based on dimension records.""" - butler = Butler(self.tmpConfigFile, run=self.default_run) + butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict") datasetTypeName = "random_data" @@ -1244,7 +1249,7 @@ def checkFileExists(self, root: str | ResourcePath, relpath: str | ResourcePath) def testPutTemplates(self) -> None: storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") - butler = Butler(self.tmpConfigFile, run=self.default_run) + butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) # Add needed Dimensions butler.registry.insertDimensionData("instrument", {"name": "DummyCamComp"}) @@ -1380,7 +1385,7 @@ def runImportExportTest(self, storageClass: StorageClass) -> None: transfer="auto", skip_dimensions=None, ) - importButler = Butler(importDir, run=self.default_run) + importButler = Butler.from_config(importDir, run=self.default_run) for ref in datasets: with self.subTest(ref=ref): # Test for existence by passing in the DatasetType and @@ -1393,7 +1398,7 @@ def runImportExportTest(self, storageClass: StorageClass) -> None: def testRemoveRuns(self) -> None: storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") - butler = Butler(self.tmpConfigFile, writeable=True) + butler = Butler.from_config(self.tmpConfigFile, writeable=True) # Load registry data with dimensions to hang datasets off of. registryDataDir = os.path.normpath(os.path.join(os.path.dirname(__file__), "data", "registry")) butler.import_(filename=os.path.join(registryDataDir, "base.yaml")) @@ -1453,12 +1458,12 @@ class PosixDatastoreButlerTestCase(FileDatastoreButlerTests, unittest.TestCase): def testPathConstructor(self) -> None: """Independent test of constructor using PathLike.""" - butler = Butler(self.tmpConfigFile, run=self.default_run) + butler = Butler.from_config(self.tmpConfigFile, run=self.default_run) self.assertIsInstance(butler, Butler) # And again with a Path object with the butler yaml path = pathlib.Path(self.tmpConfigFile) - butler = Butler(path, writeable=False) + butler = Butler.from_config(path, writeable=False) self.assertIsInstance(butler, Butler) # And again with a Path object without the butler yaml @@ -1466,7 +1471,7 @@ def testPathConstructor(self) -> None: # in butler.yaml -- which is the case for a subclass) if self.tmpConfigFile.endswith("butler.yaml"): path = pathlib.Path(os.path.dirname(self.tmpConfigFile)) - butler = Butler(path, writeable=False) + butler = Butler.from_config(path, writeable=False) self.assertIsInstance(butler, Butler) def testExportTransferCopy(self) -> None: @@ -1500,7 +1505,7 @@ def testExportTransferCopy(self) -> None: def testPruneDatasets(self) -> None: storageClass = self.storageClassFactory.getStorageClass("StructuredDataNoComponents") - butler = Butler(self.tmpConfigFile, writeable=True) + butler = Butler.from_config(self.tmpConfigFile, writeable=True) assert isinstance(butler._datastore, FileDatastore) # Load registry data with dimensions to hang datasets off of. registryDataDir = os.path.normpath(os.path.join(TESTDIR, "data", "registry")) @@ -2064,7 +2069,9 @@ def tearDown(self) -> None: def create_butler(self, manager: str, label: str) -> Butler: config = Config(self.configFile) config["registry", "managers", "datasets"] = manager - return Butler(Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True) + return Butler.from_config( + Butler.makeRepo(f"{self.root}/butler{label}", config=config), writeable=True + ) def create_butlers(self, manager1: str | None = None, manager2: str | None = None) -> None: default = "lsst.daf.butler.registry.datasets.byDimensions.ByDimensionsDatasetRecordStorageManagerUUID" @@ -2192,7 +2199,7 @@ def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "St # we are rewriting integer dataset ids in the target if necessary. # Will not be relevant for UUID. run = "distraction" - butler = Butler(butler=self.source_butler, run=run) + butler = Butler.from_config(butler=self.source_butler, run=run) butler.put( makeExampleMetrics(), datasetTypeName, @@ -2202,7 +2209,7 @@ def assertButlerTransfers(self, purge: bool = False, storageClassName: str = "St ) # Write some example metrics to the source - butler = Butler(butler=self.source_butler) + butler = Butler.from_config(butler=self.source_butler) # Set of DatasetRefs that should be in the list of refs to transfer # but which will not be transferred. @@ -2383,9 +2390,9 @@ def test_fallback(self) -> None: bad_config["datastore", "cls"] = "lsst.not.a.datastore.Datastore" with self.assertRaises(RuntimeError): - Butler(bad_config) + Butler.from_config(bad_config) - butler = Butler(bad_config, writeable=True, without_datastore=True) + butler = Butler.from_config(bad_config, writeable=True, without_datastore=True) self.assertIsInstance(butler._datastore, NullDatastore) # Check that registry is working. diff --git a/tests/test_cliCmdIngestFiles.py b/tests/test_cliCmdIngestFiles.py index fbb48f7ef0..29b9730297 100644 --- a/tests/test_cliCmdIngestFiles.py +++ b/tests/test_cliCmdIngestFiles.py @@ -104,7 +104,7 @@ def assertIngest(self, table, options): ) self.assertEqual(result.exit_code, 0, clickResultMsg(result)) - butler = Butler(self.root) + butler = Butler.from_config(self.root) refs = list(butler.registry.queryDatasets("test_metric_comp", collections=run)) self.assertEqual(len(refs), 2) diff --git a/tests/test_cliCmdPruneDatasets.py b/tests/test_cliCmdPruneDatasets.py index 7d4c5901a9..e77961994d 100644 --- a/tests/test_cliCmdPruneDatasets.py +++ b/tests/test_cliCmdPruneDatasets.py @@ -35,7 +35,6 @@ import lsst.daf.butler.registries.sql import lsst.daf.butler.script from astropy.table import Table -from lsst.daf.butler import Butler from lsst.daf.butler.cli.butler import cli as butlerCli from lsst.daf.butler.cli.cmd.commands import ( pruneDatasets_askContinueMsg, @@ -54,6 +53,7 @@ pruneDatasets_wouldRemoveMsg, ) from lsst.daf.butler.cli.utils import LogCliRunner, astropyTablesToStr, clickResultMsg +from lsst.daf.butler.direct_butler import DirectButler from lsst.daf.butler.registry import CollectionType from lsst.daf.butler.script import QueryDatasets @@ -118,7 +118,7 @@ def makePruneDatasetsArgs(**kwargs): @patch.object(lsst.daf.butler.script._pruneDatasets, "QueryDatasets", side_effect=makeQueryDatasets) # Mock the pruneDatasets butler command so we can test for expected calls # to it, without dealing with setting up a full repo with data for it. - @patch.object(Butler, "pruneDatasets") + @patch.object(DirectButler, "pruneDatasets") def run_test( self, mockPruneDatasets, diff --git a/tests/test_cliCmdQueryCollections.py b/tests/test_cliCmdQueryCollections.py index 1d88b40e1d..47eeb16cfa 100644 --- a/tests/test_cliCmdQueryCollections.py +++ b/tests/test_cliCmdQueryCollections.py @@ -98,7 +98,7 @@ def testGetCollections(self): with self.runner.isolated_filesystem(): butlerCfg = Butler.makeRepo("here") # the purpose of this call is to create some collections - butler = Butler(butlerCfg, run=run, collections=[tag], writeable=True) + butler = Butler.from_config(butlerCfg, run=run, collections=[tag], writeable=True) butler.registry.registerCollection(tag, CollectionType.TAGGED) # Verify collections that were created are found by @@ -140,7 +140,7 @@ def testChained(self): # Create a butler and add some chained collections: butlerCfg = Butler.makeRepo("here") - butler1 = Butler(butlerCfg, writeable=True) + butler1 = Butler.from_config(butlerCfg, writeable=True) # Replace datastore functions with mocks: DatastoreMock.apply(butler1) diff --git a/tests/test_cliCmdQueryDataIds.py b/tests/test_cliCmdQueryDataIds.py index f0535ab2ac..56cfa69e49 100644 --- a/tests/test_cliCmdQueryDataIds.py +++ b/tests/test_cliCmdQueryDataIds.py @@ -70,7 +70,7 @@ def loadData(self, *filenames: str) -> Butler: """Load registry test data from ``TESTDIR/data/registry/``, which should be a YAML import/export file. """ - butler = Butler(self.repo, writeable=True) + butler = Butler.from_config(self.repo, writeable=True) for filename in filenames: with open(os.path.join(TESTDIR, "data", "registry", filename)) as stream: # Go behind the back of the import code a bit to deal with diff --git a/tests/test_cliCmdQueryDimensionRecords.py b/tests/test_cliCmdQueryDimensionRecords.py index 3f982f5789..876a77453d 100644 --- a/tests/test_cliCmdQueryDimensionRecords.py +++ b/tests/test_cliCmdQueryDimensionRecords.py @@ -166,7 +166,7 @@ def testWhere(self): self.assertAstropyTablesEqual(readTable(result.output), expected) def testCollection(self): - butler = Butler(self.root, run="foo") + butler = Butler.from_config(self.root, run="foo") # try replacing the testRepo's butler with the one with the "foo" run. self.testRepo.butler = butler @@ -273,7 +273,7 @@ def testCollection(self): self.assertAstropyTablesEqual(readTable(result.output), expected) def testSkymap(self): - butler = Butler(self.root, run="foo") + butler = Butler.from_config(self.root, run="foo") # try replacing the testRepo's butler with the one with the "foo" run. self.testRepo.butler = butler diff --git a/tests/test_cliCmdRemoveCollections.py b/tests/test_cliCmdRemoveCollections.py index 080e78816e..ec20e316f5 100644 --- a/tests/test_cliCmdRemoveCollections.py +++ b/tests/test_cliCmdRemoveCollections.py @@ -220,7 +220,7 @@ def testRemoveCmd(self): # verify chained-run-1 was removed: - butler = Butler(self.root) + butler = Butler.from_config(self.root) collections = butler.registry.queryCollections( collectionTypes=frozenset( ( diff --git a/tests/test_logFormatter.py b/tests/test_logFormatter.py index 8f3d0a4d1d..a166ebadfc 100644 --- a/tests/test_logFormatter.py +++ b/tests/test_logFormatter.py @@ -49,7 +49,7 @@ def setUp(self): Butler.makeRepo(self.root) self.run = "testrun" - self.butler = Butler(self.root, run=self.run) + self.butler = Butler.from_config(self.root, run=self.run) self.datasetType = DatasetType("test_logs", [], "ButlerLogRecords", universe=self.butler.dimensions) self.butler.registry.registerDatasetType(self.datasetType) diff --git a/tests/test_matplotlibFormatter.py b/tests/test_matplotlibFormatter.py index 8851d095f9..78b5f887d8 100644 --- a/tests/test_matplotlibFormatter.py +++ b/tests/test_matplotlibFormatter.py @@ -65,7 +65,7 @@ def tearDown(self): removeTestTempDir(self.root) def testMatplotlibFormatter(self): - butler = Butler(self.root, run="testrun") + butler = Butler.from_config(self.root, run="testrun") datasetType = DatasetType("test_plot", [], "Plot", universe=butler.dimensions) butler.registry.registerDatasetType(datasetType) # Does not have to be a random image diff --git a/tests/test_packages.py b/tests/test_packages.py index 16b395c93f..1f602304ad 100644 --- a/tests/test_packages.py +++ b/tests/test_packages.py @@ -45,7 +45,7 @@ def setUp(self): """Create a new butler root for each test.""" self.root = makeTestTempDir(TESTDIR) Butler.makeRepo(self.root) - self.butler = Butler(self.root, run="test_run") + self.butler = Butler.from_config(self.root, run="test_run") # No dimensions in dataset type so we don't have to worry about # inserting dimension data or defining data IDs. self.datasetType = DatasetType( diff --git a/tests/test_parquet.py b/tests/test_parquet.py index 93753cc1c8..b39a0af407 100644 --- a/tests/test_parquet.py +++ b/tests/test_parquet.py @@ -306,7 +306,9 @@ def setUp(self): self.root = makeTestTempDir(TESTDIR) config = Config(self.configFile) self.run = "test_run" - self.butler = Butler(Butler.makeRepo(self.root, config=config), writeable=True, run=self.run) + self.butler = Butler.from_config( + Butler.makeRepo(self.root, config=config), writeable=True, run=self.run + ) # No dimensions in dataset type so we don't have to worry about # inserting dimension data or defining data IDs. self.datasetType = DatasetType( @@ -726,7 +728,9 @@ def setUp(self): self.root = makeTestTempDir(TESTDIR) config = Config(self.configFile) self.run = "test_run" - self.butler = Butler(Butler.makeRepo(self.root, config=config), writeable=True, run=self.run) + self.butler = Butler.from_config( + Butler.makeRepo(self.root, config=config), writeable=True, run=self.run + ) # No dimensions in dataset type so we don't have to worry about # inserting dimension data or defining data IDs. self.datasetType = DatasetType( @@ -1053,7 +1057,9 @@ def setUp(self): """Create a new butler root for each test.""" self.root = makeTestTempDir(TESTDIR) config = Config(self.configFile) - self.butler = Butler(Butler.makeRepo(self.root, config=config), writeable=True, run="test_run") + self.butler = Butler.from_config( + Butler.makeRepo(self.root, config=config), writeable=True, run="test_run" + ) # No dimensions in dataset type so we don't have to worry about # inserting dimension data or defining data IDs. self.datasetType = DatasetType( @@ -1313,7 +1319,9 @@ def setUp(self): """Create a new butler root for each test.""" self.root = makeTestTempDir(TESTDIR) config = Config(self.configFile) - self.butler = Butler(Butler.makeRepo(self.root, config=config), writeable=True, run="test_run") + self.butler = Butler.from_config( + Butler.makeRepo(self.root, config=config), writeable=True, run="test_run" + ) # No dimensions in dataset type so we don't have to worry about # inserting dimension data or defining data IDs. self.datasetType = DatasetType( @@ -1634,7 +1642,9 @@ def setUp(self): """Create a new butler root for each test.""" self.root = makeTestTempDir(TESTDIR) config = Config(self.configFile) - self.butler = Butler(Butler.makeRepo(self.root, config=config), writeable=True, run="test_run") + self.butler = Butler.from_config( + Butler.makeRepo(self.root, config=config), writeable=True, run="test_run" + ) # No dimensions in dataset type so we don't have to worry about # inserting dimension data or defining data IDs. self.datasetType = DatasetType( @@ -1787,7 +1797,9 @@ def setUp(self): """Create a new butler root for each test.""" self.root = makeTestTempDir(TESTDIR) config = Config(self.configFile) - self.butler = Butler(Butler.makeRepo(self.root, config=config), writeable=True, run="test_run") + self.butler = Butler.from_config( + Butler.makeRepo(self.root, config=config), writeable=True, run="test_run" + ) # No dimensions in dataset type so we don't have to worry about # inserting dimension data or defining data IDs. self.datasetType = DatasetType( diff --git a/tests/test_quantumBackedButler.py b/tests/test_quantumBackedButler.py index 1cf801fdf9..423ee7e083 100644 --- a/tests/test_quantumBackedButler.py +++ b/tests/test_quantumBackedButler.py @@ -43,6 +43,7 @@ RegistryConfig, StorageClass, ) +from lsst.daf.butler.direct_butler import DirectButler from lsst.daf.butler.registry import _RegistryFactory from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir from lsst.resources import ResourcePath @@ -62,7 +63,9 @@ def setUp(self) -> None: # Make a butler and import dimension definitions. registryConfig = RegistryConfig(self.config.get("registry")) _RegistryFactory(registryConfig).create_from_config(butlerRoot=self.root) - self.butler = Butler(self.config, writeable=True, run="RUN") + butler = Butler.from_config(self.config, writeable=True, run="RUN") + assert isinstance(butler, DirectButler) + self.butler = butler self.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) # make all dataset types diff --git a/tests/test_simpleButler.py b/tests/test_simpleButler.py index a564e4b08b..483d7d03ef 100644 --- a/tests/test_simpleButler.py +++ b/tests/test_simpleButler.py @@ -79,7 +79,7 @@ def makeButler(self, **kwargs: Any) -> Butler: registryConfig = RegistryConfig(config.get("registry")) _RegistryFactory(registryConfig).create_from_config() - butler = Butler(config, **kwargs) + butler = Butler.from_config(config, **kwargs) DatastoreMock.apply(butler) return butler @@ -549,13 +549,13 @@ def testRegistryDefaults(self): # Initialize a new butler with `imported_g` as its default run. # This should not have a default instrument, because there are two. # Pass run instead of collections; this should set both. - butler2 = Butler(butler=butler, run="imported_g") + butler2 = Butler.from_config(butler=butler, run="imported_g") self.assertEqual(list(butler2.registry.defaults.collections), ["imported_g"]) self.assertEqual(butler2.registry.defaults.run, "imported_g") self.assertFalse(butler2.registry.defaults.dataId) # Initialize a new butler with an instrument default explicitly given. # Set collections instead of run, which should then be None. - butler3 = Butler(butler=butler, collections=["imported_g"], instrument="Cam2") + butler3 = Butler.from_config(butler=butler, collections=["imported_g"], instrument="Cam2") self.assertEqual(list(butler3.registry.defaults.collections), ["imported_g"]) self.assertIsNone(butler3.registry.defaults.run, None) self.assertEqual(butler3.registry.defaults.dataId.byName(), {"instrument": "Cam2"}) diff --git a/tests/test_testRepo.py b/tests/test_testRepo.py index faf9518291..71f40e7e6f 100644 --- a/tests/test_testRepo.py +++ b/tests/test_testRepo.py @@ -211,7 +211,7 @@ def testRegisterMetricsExampleChained(self): ] repo = lsst.daf.butler.Butler.makeRepo(temp, config=config) - butler = lsst.daf.butler.Butler(repo, run="chainedExample") + butler = lsst.daf.butler.Butler.from_config(repo, run="chainedExample") registerMetricsExample(butler) addDatasetType(butler, "DummyType", {}, "StructuredDataNoComponents") From 551b1c096feffed3005866061d15bc46ee8a9e16 Mon Sep 17 00:00:00 2001 From: Andy Salnikov Date: Tue, 17 Oct 2023 16:16:35 -0700 Subject: [PATCH 3/5] Make some parameters to Butler() explicit and document them. Moved examples section from `DirectButler` docstring to `Butler.from_config` --- python/lsst/daf/butler/_butler.py | 158 ++++++++++++++++++++++-- python/lsst/daf/butler/direct_butler.py | 39 ------ 2 files changed, 148 insertions(+), 49 deletions(-) diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py index 7510aa65b7..c928684548 100644 --- a/python/lsst/daf/butler/_butler.py +++ b/python/lsst/daf/butler/_butler.py @@ -69,8 +69,37 @@ class Butler(LimitedButler): values will be used. If ``config`` contains "cls" key then its value is used as a name of butler class and it must be a sub-class of this class, otherwise `DirectButler` is instantiated. + collections : `str` or `~collections.abc.Iterable` [ `str` ], optional + An expression specifying the collections to be searched (in order) when + reading datasets. + This may be a `str` collection name or an iterable thereof. + See :ref:`daf_butler_collection_expressions` for more information. + These collections are not registered automatically and must be + manually registered before they are used by any method, but they may be + manually registered after the `Butler` is initialized. + run : `str`, optional + Name of the `~CollectionType.RUN` collection new datasets should be + inserted into. If ``collections`` is `None` and ``run`` is not `None`, + ``collections`` will be set to ``[run]``. If not `None`, this + collection will automatically be registered. If this is not set (and + ``writeable`` is not set either), a read-only butler will be created. + searchPaths : `list` of `str`, optional + Directory paths to search when calculating the full Butler + configuration. Not used if the supplied config is already a + `ButlerConfig`. + writeable : `bool`, optional + Explicitly sets whether the butler supports write operations. If not + provided, a read-write butler is created if any of ``run``, ``tags``, + or ``chains`` is non-empty. + inferDefaults : `bool`, optional + If `True` (default) infer default data ID values from the values + present in the datasets in ``collections``: if all collections have the + same value (or no value) for a governor dimension, that value will be + the default for that dimension. Nonexistent collections are ignored. + If a default value is provided explicitly for a governor dimension via + ``**kwargs``, no default will be inferred for that dimension. **kwargs : `Any` - Optional keyword arguments passed to a constructor of actual butler + Additional keyword arguments passed to a constructor of actual butler class. Notes @@ -80,23 +109,34 @@ class Butler(LimitedButler): but ``mypy`` will complain about the former. """ - def __new__(cls, config: Config | ResourcePathExpression | None = None, **kwargs: Any) -> Butler: + def __new__( + cls, + config: Config | ResourcePathExpression | None = None, + *, + collections: Any = None, + run: str | None = None, + searchPaths: Sequence[ResourcePathExpression] | None = None, + writeable: bool | None = None, + inferDefaults: bool = True, + **kwargs: Any, + ) -> Butler: if cls is Butler: - cls = cls._find_butler_class(config, **kwargs) + cls = cls._find_butler_class(config, searchPaths) # Note: we do not pass any parameters to __new__, Python will pass them # to __init__ after __new__ returns sub-class instance. return super().__new__(cls) @staticmethod def _find_butler_class( - config: Config | ResourcePathExpression | None = None, **kwargs: Any + config: Config | ResourcePathExpression | None = None, + searchPaths: Sequence[ResourcePathExpression] | None = None, ) -> type[Butler]: """Find actual class to instantiate.""" butler_class_name: str | None = None if config is not None: # Check for optional "cls" key in config. if not isinstance(config, Config): - config = ButlerConfig(config, searchPaths=kwargs.get("searchPaths")) + config = ButlerConfig(config, searchPaths=searchPaths) butler_class_name = config.get("cls") # Make DirectButler if class is not specified. @@ -112,7 +152,17 @@ def _find_butler_class( return butler_class @classmethod - def from_config(cls, config: Config | ResourcePathExpression | None = None, **kwargs: Any) -> Butler: + def from_config( + cls, + config: Config | ResourcePathExpression | None = None, + *, + collections: Any = None, + run: str | None = None, + searchPaths: Sequence[ResourcePathExpression] | None = None, + writeable: bool | None = None, + inferDefaults: bool = True, + **kwargs: Any, + ) -> Butler: """Create butler instance from configuration. Parameters @@ -124,18 +174,106 @@ def from_config(cls, config: Config | ResourcePathExpression | None = None, **kw given default values will be used. If ``config`` contains "cls" key then its value is used as a name of butler class and it must be a sub-class of this class, otherwise `DirectButler` is instantiated. + collections : `str` or `~collections.abc.Iterable` [ `str` ], optional + An expression specifying the collections to be searched (in order) + when reading datasets. + This may be a `str` collection name or an iterable thereof. + See :ref:`daf_butler_collection_expressions` for more information. + These collections are not registered automatically and must be + manually registered before they are used by any method, but they + may be manually registered after the `Butler` is initialized. + run : `str`, optional + Name of the `~CollectionType.RUN` collection new datasets should be + inserted into. If ``collections`` is `None` and ``run`` is not + `None`, ``collections`` will be set to ``[run]``. If not `None`, + this collection will automatically be registered. If this is not + set (and ``writeable`` is not set either), a read-only butler will + be created. + searchPaths : `list` of `str`, optional + Directory paths to search when calculating the full Butler + configuration. Not used if the supplied config is already a + `ButlerConfig`. + writeable : `bool`, optional + Explicitly sets whether the butler supports write operations. If + not provided, a read-write butler is created if any of ``run``, + ``tags``, or ``chains`` is non-empty. + inferDefaults : `bool`, optional + If `True` (default) infer default data ID values from the values + present in the datasets in ``collections``: if all collections have + the same value (or no value) for a governor dimension, that value + will be the default for that dimension. Nonexistent collections + are ignored. If a default value is provided explicitly for a + governor dimension via ``**kwargs``, no default will be inferred + for that dimension. **kwargs : `Any` - Optional keyword arguments passed to a constructor of actual butler - class. + Additional keyword arguments passed to a constructor of actual + butler class. Notes ----- Calling this factory method is identical to calling ``Butler(config, ...)``. Its only raison d'ĂȘtre is that ``mypy`` complains about ``Butler()`` call. + + Examples + -------- + While there are many ways to control exactly how a `Butler` interacts + with the collections in its `Registry`, the most common cases are still + simple. + + For a read-only `Butler` that searches one collection, do:: + + butler = Butler.from_config( + "/path/to/repo", collections=["u/alice/DM-50000"] + ) + + For a read-write `Butler` that writes to and reads from a + `~CollectionType.RUN` collection:: + + butler = Butler.from_config( + "/path/to/repo", run="u/alice/DM-50000/a" + ) + + The `Butler` passed to a ``PipelineTask`` is often much more complex, + because we want to write to one `~CollectionType.RUN` collection but + read from several others (as well):: + + butler = Butler.from_config( + "/path/to/repo", + run="u/alice/DM-50000/a", + collections=[ + "u/alice/DM-50000/a", "u/bob/DM-49998", "HSC/defaults" + ] + ) + + This butler will `put` new datasets to the run ``u/alice/DM-50000/a``. + Datasets will be read first from that run (since it appears first in + the chain), and then from ``u/bob/DM-49998`` and finally + ``HSC/defaults``. + + Finally, one can always create a `Butler` with no collections:: + + butler = Butler.from_config("/path/to/repo", writeable=True) + + This can be extremely useful when you just want to use + ``butler.registry``, e.g. for inserting dimension data or managing + collections, or when the collections you want to use with the butler + are not consistent. Passing ``writeable`` explicitly here is only + necessary if you want to be able to make changes to the repo - usually + the value for ``writeable`` can be guessed from the collection + arguments provided, but it defaults to `False` when there are not + collection arguments. """ - cls = cls._find_butler_class(config, **kwargs) - return cls(config, **kwargs) + cls = cls._find_butler_class(config, searchPaths) + return cls( + config, + collections=collections, + run=run, + searchPaths=searchPaths, + writeable=writeable, + inferDefaults=inferDefaults, + **kwargs, + ) @staticmethod def makeRepo( diff --git a/python/lsst/daf/butler/direct_butler.py b/python/lsst/daf/butler/direct_butler.py index 9572d80762..76be5e0b3b 100644 --- a/python/lsst/daf/butler/direct_butler.py +++ b/python/lsst/daf/butler/direct_butler.py @@ -153,45 +153,6 @@ class DirectButler(Butler): **kwargs : `str` Default data ID key-value pairs. These may only identify "governor" dimensions like ``instrument`` and ``skymap``. - - Examples - -------- - While there are many ways to control exactly how a `Butler` interacts with - the collections in its `Registry`, the most common cases are still simple. - - For a read-only `Butler` that searches one collection, do:: - - butler = Butler("/path/to/repo", collections=["u/alice/DM-50000"]) - - For a read-write `Butler` that writes to and reads from a - `~CollectionType.RUN` collection:: - - butler = Butler("/path/to/repo", run="u/alice/DM-50000/a") - - The `Butler` passed to a ``PipelineTask`` is often much more complex, - because we want to write to one `~CollectionType.RUN` collection but read - from several others (as well):: - - butler = Butler("/path/to/repo", run="u/alice/DM-50000/a", - collections=["u/alice/DM-50000/a", - "u/bob/DM-49998", - "HSC/defaults"]) - - This butler will `put` new datasets to the run ``u/alice/DM-50000/a``. - Datasets will be read first from that run (since it appears first in the - chain), and then from ``u/bob/DM-49998`` and finally ``HSC/defaults``. - - Finally, one can always create a `Butler` with no collections:: - - butler = Butler("/path/to/repo", writeable=True) - - This can be extremely useful when you just want to use ``butler.registry``, - e.g. for inserting dimension data or managing collections, or when the - collections you want to use with the butler are not consistent. - Passing ``writeable`` explicitly here is only necessary if you want to be - able to make changes to the repo - usually the value for ``writeable`` can - be guessed from the collection arguments provided, but it defaults to - `False` when there are not collection arguments. """ def __init__( From ea5087a62b79f444262d315ee2afe613dce515b0 Mon Sep 17 00:00:00 2001 From: Andy Salnikov Date: Tue, 17 Oct 2023 15:53:24 -0700 Subject: [PATCH 4/5] Update logging usage in Butler classes --- python/lsst/daf/butler/_butler.py | 4 +- python/lsst/daf/butler/direct_butler.py | 63 +++++++++++++------------ 2 files changed, 36 insertions(+), 31 deletions(-) diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py index c928684548..a01541f9ad 100644 --- a/python/lsst/daf/butler/_butler.py +++ b/python/lsst/daf/butler/_butler.py @@ -54,7 +54,7 @@ from .repo_relocation import BUTLER_ROOT_TAG from .transfers import RepoExportContext -log = getLogger(__name__) +_LOG = getLogger(__name__) class Butler(LimitedButler): @@ -427,7 +427,7 @@ def makeRepo( dimensionConfig=dimensionConfig, butlerRoot=root_uri ) - log.verbose("Wrote new Butler configuration file to %s", configURI) + _LOG.verbose("Wrote new Butler configuration file to %s", configURI) return config diff --git a/python/lsst/daf/butler/direct_butler.py b/python/lsst/daf/butler/direct_butler.py index 76be5e0b3b..68619848fd 100644 --- a/python/lsst/daf/butler/direct_butler.py +++ b/python/lsst/daf/butler/direct_butler.py @@ -94,7 +94,7 @@ from .transfers import RepoImportBackend -log = getLogger(__name__) +_LOG = getLogger(__name__) class ButlerValidationError(ValidationError): @@ -203,7 +203,7 @@ def __init__( except Exception: # Failures here usually mean that configuration is incomplete, # just issue an error message which includes config file URI. - log.error(f"Failed to instantiate Butler from config {self._config.configFile}.") + _LOG.error(f"Failed to instantiate Butler from config {self._config.configFile}.") raise # For execution butler the datastore needs a special @@ -496,7 +496,7 @@ def _rewrite_data_id( if isinstance(value, alternate.getPythonType()): byRecord[dimensionName][alternate.name] = value del dataIdDict[dimensionName] - log.debug( + _LOG.debug( "Converting dimension %s to %s.%s=%s", dimensionName, dimensionName, @@ -505,7 +505,7 @@ def _rewrite_data_id( ) break else: - log.warning( + _LOG.warning( "Type mismatch found for value '%r' provided for dimension %s. " "Could not find matching alternative (primary key has type %s) " "so attempting to use as-is.", @@ -519,7 +519,7 @@ def _rewrite_data_id( # are dimensions in both (rather than calling update). for k, v in kwargs.items(): if k in newDataId and newDataId[k] != v: - log.debug( + _LOG.debug( "Keyword arg %s overriding explicit value in dataId of %s with %s", k, newDataId[k], v ) newDataId[k] = v @@ -617,7 +617,7 @@ def _rewrite_data_id( # Returns a list of tuples selected = duplicatesCounter.most_common(1)[0][0] - log.debug( + _LOG.debug( "Ambiguous dataId entry '%s' associated with multiple dimensions: %s." " Removed ambiguity by choosing dimension %s.", fieldName, @@ -632,7 +632,9 @@ def _rewrite_data_id( # Update the record look up dict with the new associations for dimensionName, values in guessedAssociation.items(): if values: # A dict might now be empty - log.debug("Assigned non-dimension dataId keys to dimension %s: %s", dimensionName, values) + _LOG.debug( + "Assigned non-dimension dataId keys to dimension %s: %s", dimensionName, values + ) byRecord[dimensionName].update(values) if byRecord: @@ -640,7 +642,7 @@ def _rewrite_data_id( # them to the Id form for dimensionName, values in byRecord.items(): if dimensionName in newDataId: - log.debug( + _LOG.debug( "DataId specified explicit %s dimension value of %s in addition to" " general record specifiers for it of %s. Ignoring record information.", dimensionName, @@ -727,9 +729,11 @@ def _rewrite_data_id( # The ambiguity may have been resolved so check again. if len(records) > 1: - log.debug("Received %d records from constraints of %s", len(records), str(values)) + _LOG.debug( + "Received %d records from constraints of %s", len(records), str(values) + ) for r in records: - log.debug("- %s", str(r)) + _LOG.debug("- %s", str(r)) raise ValueError( f"DataId specification for dimension {dimensionName} is not" f" uniquely constrained to a single dataset by {values}." @@ -935,7 +939,7 @@ def put( """ if isinstance(datasetRefOrType, DatasetRef): # This is a direct put of predefined DatasetRef. - log.debug("Butler put direct: %s", datasetRefOrType) + _LOG.debug("Butler put direct: %s", datasetRefOrType) if run is not None: warnings.warn("Run collection is not used for DatasetRef", stacklevel=3) # If registry already has a dataset with the same dataset ID, @@ -956,7 +960,7 @@ def put( raise ConflictingDefinitionError(f"Datastore already contains dataset: {e}") from e return datasetRefOrType - log.debug("Butler put: %s, dataId=%s, run=%s", datasetRefOrType, dataId, run) + _LOG.debug("Butler put: %s, dataId=%s, run=%s", datasetRefOrType, dataId, run) if not self.isWriteable(): raise TypeError("Butler is read-only.") datasetType, dataId = self._standardizeArgs(datasetRefOrType, dataId, **kwargs) @@ -1187,7 +1191,7 @@ def get( fetched with a ``{instrument, detector, exposure}`` data ID, because ``exposure`` is a temporal dimension. """ - log.debug("Butler get: %s, dataId=%s, parameters=%s", datasetRefOrType, dataId, parameters) + _LOG.debug("Butler get: %s, dataId=%s, parameters=%s", datasetRefOrType, dataId, parameters) ref = self._findDatasetRef( datasetRefOrType, dataId, collections=collections, datastore_records=True, **kwargs ) @@ -1584,7 +1588,7 @@ def ingest( if not self.isWriteable(): raise TypeError("Butler is read-only.") - log.verbose("Ingesting %d file dataset%s.", len(datasets), "" if len(datasets) == 1 else "s") + _LOG.verbose("Ingesting %d file dataset%s.", len(datasets), "" if len(datasets) == 1 else "s") if not datasets: return @@ -1664,7 +1668,7 @@ def ingest( refs_to_import.extend(dataset.refs) n_refs = len(refs_to_import) - log.verbose( + _LOG.verbose( "Importing %d ref%s of dataset type %r into run %r", n_refs, "" if n_refs == 1 else "s", @@ -1767,7 +1771,7 @@ def import_( exists_in_cwd = filename.exists() exists_in_dir = potential.exists() if exists_in_cwd and exists_in_dir: - log.warning( + _LOG.warning( "A relative path for filename was specified (%s) which exists relative to cwd. " "Additionally, the file exists relative to the given search directory (%s). " "Using the export file in the given directory.", @@ -1828,7 +1832,7 @@ def transfer_from( source_refs = list(source_refs) original_count = len(source_refs) - log.info("Transferring %d datasets into %s", original_count, str(self)) + _LOG.info("Transferring %d datasets into %s", original_count, str(self)) # In some situations the datastore artifact may be missing # and we do not want that registry entry to be imported. @@ -1844,7 +1848,7 @@ def transfer_from( source_refs = [ref for ref, exists in dataset_existence.items() if exists] filtered_count = len(source_refs) n_missing = original_count - filtered_count - log.verbose( + _LOG.verbose( "%d dataset%s removed because the artifact does not exist. Now have %d.", n_missing, "" if n_missing == 1 else "s", @@ -1883,13 +1887,12 @@ def transfer_from( if newly_registered_dataset_types: # We may have registered some even if there were inconsistencies # but should let people know (or else remove them again). - log.log( - VERBOSE, + _LOG.verbose( "Registered the following dataset types in the target Butler: %s", ", ".join(d.name for d in newly_registered_dataset_types), ) else: - log.log(VERBOSE, "All required dataset types are known to the target Butler") + _LOG.verbose("All required dataset types are known to the target Butler") dimension_records: dict[DimensionElement, dict[DataCoordinate, DimensionRecord]] = defaultdict(dict) if transfer_dimensions: @@ -1922,7 +1925,7 @@ def transfer_from( # Do all the importing in a single transaction. with self.transaction(): if dimension_records: - log.verbose("Ensuring that dimension records exist for transferred datasets.") + _LOG.verbose("Ensuring that dimension records exist for transferred datasets.") for element, r in dimension_records.items(): records = [r[dataId] for dataId in r] # Assume that if the record is already present that we can @@ -1943,10 +1946,10 @@ def transfer_from( registered = self._registry.registerRun(run, doc=run_doc) handled_collections.add(run) if registered: - log.log(VERBOSE, "Creating output run %s", run) + _LOG.verbose("Creating output run %s", run) n_refs = len(refs_to_import) - log.verbose( + _LOG.verbose( "Importing %d ref%s of dataset type %s into run %s", n_refs, "" if n_refs == 1 else "s", @@ -1961,7 +1964,7 @@ def transfer_from( n_imported += len(imported_refs) assert len(source_refs) == n_imported - log.verbose("Imported %d datasets into destination butler", n_imported) + _LOG.verbose("Imported %d datasets into destination butler", n_imported) # Ask the datastore to transfer. The datastore has to check that # the source datastore is compatible with the target datastore. @@ -1973,7 +1976,7 @@ def transfer_from( ) if rejected: # For now, accept the registry entries but not the files. - log.warning( + _LOG.warning( "%d datasets were rejected and %d accepted for dataset type %s in run %r.", len(rejected), len(accepted), @@ -2064,7 +2067,9 @@ def validateConfiguration( self._registry.getDatasetType(key.name) except KeyError: if logFailures: - log.critical("Key '%s' does not correspond to a DatasetType or StorageClass", key) + _LOG.critical( + "Key '%s' does not correspond to a DatasetType or StorageClass", key + ) failedNames.add(key) else: # Dimensions are checked for consistency when the Butler @@ -2077,11 +2082,11 @@ def validateConfiguration( dataIdKeys = set(key.dataId) if {"instrument"} != dataIdKeys: if logFailures: - log.critical("Key '%s' has unsupported DataId override", key) + _LOG.critical("Key '%s' has unsupported DataId override", key) failedDataId.add(key) elif key.dataId["instrument"] not in instruments: if logFailures: - log.critical("Key '%s' has unknown instrument", key) + _LOG.critical("Key '%s' has unknown instrument", key) failedDataId.add(key) messages = [] From a5d78c28c528f5b440cdab5aa3e5ed8723e7f2df Mon Sep 17 00:00:00 2001 From: Andy Salnikov Date: Sun, 15 Oct 2023 21:48:52 -0700 Subject: [PATCH 5/5] Add news fragment --- doc/changes/DM-41116.api.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/changes/DM-41116.api.md diff --git a/doc/changes/DM-41116.api.md b/doc/changes/DM-41116.api.md new file mode 100644 index 0000000000..7dbbf2837e --- /dev/null +++ b/doc/changes/DM-41116.api.md @@ -0,0 +1,3 @@ +- `Butler` class becomes an abstract base class, original `Butler` was renamed to `DirectButler`. +- Clients that need an access to `DirectButler` class will have to import it from `lsst.daf.butler.direct_butler`. +- `Butler.from_config(...)` should be used to make `Butler` instances. `Butler(...)` still works and is identical to `Butler.from_config(...)`, but will generate `mypy` errors.