lsst · timj · Jul 21, 2023 · Jul 20, 2023 · Jul 20, 2023 · Jul 20, 2023
diff --git a/doc/changes/DM-40120.api.rst b/doc/changes/DM-40120.api.rst
@@ -0,0 +1,2 @@
+Added new parameter ``without_datastore`` to the ``Butler`` and ``ButlerConfig`` constructors to allow a butler to be created that can not access a datastore.
+This can be helpful if you want to query registry without requiring the overhead of the datastore.
diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py
@@ -69,6 +69,7 @@
     DimensionRecord,
     DimensionUniverse,
     FileDataset,
+    NullDatastore,
     Progress,
     StorageClass,
     StorageClassFactory,
@@ -149,6 +150,9 @@ class Butler(LimitedButler):
         the default for that dimension.  Nonexistent collections are ignored.
         If a default value is provided explicitly for a governor dimension via
         ``**kwargs``, no default will be inferred for that dimension.
+    without_datastore : `bool`, optional
+        If `True` do not attach a datastore to this butler. Any attempts
+        to use a datastore will fail.
     **kwargs : `str`
         Default data ID key-value pairs.  These may only identify "governor"
         dimensions like ``instrument`` and ``skymap``.
@@ -203,6 +207,7 @@ def __init__(
         searchPaths: Sequence[ResourcePathExpression] | None = None,
         writeable: bool | None = None,
         inferDefaults: bool = True,
+        without_datastore: bool = False,
         **kwargs: str,
     ):
         defaults = RegistryDefaults(collections=collections, run=run, infer=inferDefaults, **kwargs)
@@ -217,7 +222,7 @@ def __init__(
             self.storageClasses = butler.storageClasses
             self._config: ButlerConfig = butler._config
         else:
-            self._config = ButlerConfig(config, searchPaths=searchPaths)
+            self._config = ButlerConfig(config, searchPaths=searchPaths, without_datastore=without_datastore)
             try:
                 if "root" in self._config:
                     butlerRoot = self._config["root"]
@@ -228,9 +233,12 @@ def __init__(
                 self._registry = _RegistryFactory(self._config).from_config(
                     butlerRoot=butlerRoot, writeable=writeable, defaults=defaults
                 )
-                self._datastore = Datastore.fromConfig(
-                    self._config, self._registry.getDatastoreBridgeManager(), butlerRoot=butlerRoot
-                )
+                if without_datastore:
+                    self._datastore = NullDatastore(None, None)
+                else:
+                    self._datastore = Datastore.fromConfig(
+                        self._config, self._registry.getDatastoreBridgeManager(), butlerRoot=butlerRoot
+                    )
                 self.storageClasses = StorageClassFactory()
                 self.storageClasses.addFromConfig(self._config)
             except Exception:

diff --git a/python/lsst/daf/butler/_butlerConfig.py b/python/lsst/daf/butler/_butlerConfig.py
@@ -61,12 +61,15 @@ class ButlerConfig(Config):
         than those read from the environment in
         `ConfigSubset.defaultSearchPaths()`.  They are only read if ``other``
         refers to a configuration file or directory.
+    without_datastore : `bool`, optional
+        If `True` remove the datastore configuration.
     """
 
     def __init__(
         self,
         other: ResourcePathExpression | Config | None = None,
         searchPaths: Sequence[ResourcePathExpression] | None = None,
+        without_datastore: bool = False,
     ):
         self.configDir: ResourcePath | None = None
 
@@ -155,6 +158,13 @@ def __init__(
         # configuration classes. We ask each of them to apply defaults to
         # the values we have been supplied by the user.
         for configClass in CONFIG_COMPONENT_CLASSES:
+            assert configClass.component is not None, "Config class component cannot be None"
+
+            if without_datastore and configClass is DatastoreConfig:
+                if configClass.component in butlerConfig:
+                    del butlerConfig[configClass.component]
+                continue
+
             # Only send the parent config if the child
             # config component is present (otherwise it assumes that the
             # keys from other components are part of the child)
@@ -163,7 +173,6 @@ def __init__(
                 localOverrides = butlerConfig
             config = configClass(localOverrides, searchPaths=searchPaths)
             # Re-attach it using the global namespace
-            assert configClass.component is not None, "Config class component cannot be None"
             self.update({configClass.component: config})
             # Remove the key from the butlerConfig since we have already
             # merged that information.

diff --git a/python/lsst/daf/butler/core/datastore.py b/python/lsst/daf/butler/core/datastore.py
@@ -23,11 +23,12 @@
 
 from __future__ import annotations
 
-__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError", "DatasetRefURIs")
+__all__ = ("DatastoreConfig", "Datastore", "DatastoreValidationError", "DatasetRefURIs", "NullDatastore")
 
 import contextlib
 import dataclasses
 import logging
+import time
 from abc import ABCMeta, abstractmethod
 from collections import abc, defaultdict
 from collections.abc import Callable, Iterable, Iterator, Mapping
@@ -50,6 +51,8 @@
     from .datastoreRecordData import DatastoreRecordData
     from .storageClass import StorageClass
 
+_LOG = logging.getLogger(__name__)
+
 
 class DatastoreConfig(ConfigSubset):
     """Configuration for Datastores."""
@@ -1205,3 +1208,122 @@
         guess dataset location based on its stored dataset type.
         """
         pass
+
+
+class NullDatastore(Datastore):
+    """A datastore that implements the `Datastore` API but always fails when
+    it accepts any request.
+    """
+
+    @classmethod
+    def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
+        # Nothing to do. This is not a real Datastore.
+        pass
+
+    def __init__(
+        self,
+        config: Config | ResourcePathExpression | None,
+        bridgeManager: DatastoreRegistryBridgeManager | None,
+        butlerRoot: ResourcePathExpression | None = None,
+    ):
+        # Name ourselves with the timestamp the datastore
+        # was created.
+        self.name = f"{type(self).__name__}@{time.time()}"
+        _LOG.debug("Creating datastore %s", self.name)
+
+        return
+
+    def knows(self, ref: DatasetRef) -> bool:
+        return False
+
+    def exists(self, datasetRef: DatasetRef) -> bool:
+        return False
+
+    def get(
+        self,
+        datasetRef: DatasetRef,
+        parameters: Mapping[str, Any] | None = None,
+        storageClass: StorageClass | str | None = None,
+    ) -> Any:
+        raise FileNotFoundError("This is a no-op datastore that can not access a real datastore")
+
+    def put(self, inMemoryDataset: Any, datasetRef: DatasetRef) -> None:
+        raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
+
+    def ingest(
+        self, *datasets: FileDataset, transfer: str | None = None, record_validation_info: bool = True
+    ) -> None:
+        raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
+
+    def transfer_from(
+        self,
+        source_datastore: Datastore,
+        refs: Iterable[DatasetRef],
+        transfer: str = "auto",
+        artifact_existence: dict[ResourcePath, bool] | None = None,
+    ) -> tuple[set[DatasetRef], set[DatasetRef]]:
+        raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
+
+    def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs:
+        raise FileNotFoundError("This is a no-op datastore that can not access a real datastore")
+
+    def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath:
+        raise FileNotFoundError("This is a no-op datastore that can not access a real datastore")
+
+    def retrieveArtifacts(
+        self,
+        refs: Iterable[DatasetRef],
+        destination: ResourcePath,
+        transfer: str = "auto",
+        preserve_path: bool = True,
+        overwrite: bool = False,
+    ) -> list[ResourcePath]:
+        raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
+
+    def remove(self, datasetRef: DatasetRef) -> None:
+        raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
+
+    def forget(self, refs: Iterable[DatasetRef]) -> None:
+        raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
+
+    def trash(self, ref: DatasetRef | Iterable[DatasetRef], ignore_errors: bool = True) -> None:
+        raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
+
+    def emptyTrash(self, ignore_errors: bool = True) -> None:
+        raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
+
+    def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None:
+        raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
+
+    def export(
+        self,
+        refs: Iterable[DatasetRef],
+        *,
+        directory: ResourcePathExpression | None = None,
+        transfer: str | None = "auto",
+    ) -> Iterable[FileDataset]:
+        raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
+
+    def validateConfiguration(
+        self, entities: Iterable[DatasetRef | DatasetType | StorageClass], logFailures: bool = False
+    ) -> None:
+        # No configuration so always validates.
+        pass
+
+    def validateKey(self, lookupKey: LookupKey, entity: DatasetRef | DatasetType | StorageClass) -> None:
+        pass
+
+    def getLookupKeys(self) -> set[LookupKey]:
+        raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
+
+    def import_records(
+        self,
+        data: Mapping[str, DatastoreRecordData],
+    ) -> None:
+        raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
+
+    def export_records(
+        self,
+        refs: Iterable[DatasetIdRef],
+    ) -> Mapping[str, DatastoreRecordData]:
+        raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
diff --git a/python/lsst/daf/butler/script/certifyCalibrations.py b/python/lsst/daf/butler/script/certifyCalibrations.py
@@ -63,7 +63,7 @@
         Search all children of the inputCollection if it is a CHAINED
         collection, instead of just the most recent one.
     """
-    butler = Butler(repo, writeable=True)
+    butler = Butler(repo, writeable=True, without_datastore=True)
     registry = butler.registry
     timespan = Timespan(
         begin=astropy.time.Time(begin_date, scale="tai") if begin_date is not None else None,

diff --git a/python/lsst/daf/butler/script/collectionChain.py b/python/lsst/daf/butler/script/collectionChain.py
@@ -65,7 +65,7 @@ def collectionChain(
     chain : `tuple` of `str`
         The collections in the chain following this command.
     """
-    butler = Butler(repo, writeable=True)
+    butler = Butler(repo, writeable=True, without_datastore=True)
 
     # Every mode needs children except pop.
     if not children and mode != "pop":

diff --git a/python/lsst/daf/butler/script/queryCollections.py b/python/lsst/daf/butler/script/queryCollections.py
@@ -134,7 +134,7 @@ def _getTree(
         names=("Name", "Type"),
         dtype=(str, str),
     )
-    butler = Butler(repo)
+    butler = Butler(repo, without_datastore=True)
 
     def addCollection(name: str, level: int = 0) -> None:
         collectionType = butler.registry.getCollectionType(name)

diff --git a/python/lsst/daf/butler/script/queryDataIds.py b/python/lsst/daf/butler/script/queryDataIds.py
@@ -103,7 +103,7 @@ def queryDataIds(
     Docstring for supported parameters is the same as
     `~lsst.daf.butler.Registry.queryDataIds`.
     """
-    butler = Butler(repo)
+    butler = Butler(repo, without_datastore=True)
 
     if datasets and collections and not dimensions:
         # Determine the dimensions relevant to all given dataset types.

diff --git a/python/lsst/daf/butler/script/queryDatasetTypes.py b/python/lsst/daf/butler/script/queryDatasetTypes.py
@@ -55,7 +55,7 @@ def queryDatasetTypes(repo: str, verbose: bool, glob: Iterable[str], components:
         A dict whose key is "datasetTypes" and whose value is a list of
         collection names.
     """
-    butler = Butler(repo)
+    butler = Butler(repo, without_datastore=True)
     expression = glob if glob else ...
     datasetTypes = butler.registry.queryDatasetTypes(components=components, expression=expression)
     if verbose:

diff --git a/python/lsst/daf/butler/script/queryDatasets.py b/python/lsst/daf/butler/script/queryDatasets.py
@@ -167,7 +167,9 @@ def __init__(
     ):
         if (repo and butler) or (not repo and not butler):
             raise RuntimeError("One of repo and butler must be provided and the other must be None.")
-        self.butler = butler or Butler(repo)
+        # show_uri requires a datastore.
+        without_datastore = False if show_uri else True
+        self.butler = butler or Butler(repo, without_datastore=without_datastore)
         self._getDatasets(glob, collections, where, find_first)
         self.showUri = show_uri
 

diff --git a/python/lsst/daf/butler/script/queryDimensionRecords.py b/python/lsst/daf/butler/script/queryDimensionRecords.py
@@ -48,7 +48,7 @@ def queryDimensionRecords(
     `~lsst.daf.butler.Registry.queryDimensionRecords` except for ``no_check``,
     which is the inverse of ``check``.
     """
-    butler = Butler(repo)
+    butler = Butler(repo, without_datastore=True)
 
     query_collections: Iterable[str] | EllipsisType | None = None
     if datasets:

diff --git a/python/lsst/daf/butler/script/register_dataset_type.py b/python/lsst/daf/butler/script/register_dataset_type.py
@@ -63,7 +63,7 @@ def register_dataset_type(
         be created by this command. They are always derived from the composite
         dataset type.
     """
-    butler = Butler(repo, writeable=True)
+    butler = Butler(repo, writeable=True, without_datastore=True)
 
     composite, component = DatasetType.splitDatasetTypeName(dataset_type)
     if component:

diff --git a/python/lsst/daf/butler/script/removeDatasetType.py b/python/lsst/daf/butler/script/removeDatasetType.py
@@ -37,5 +37,5 @@ def removeDatasetType(repo: str, dataset_type_name: tuple[str, ...]) -> None:
     datasetTypeName : `str`
         The name of the dataset type to be removed.
     """
-    butler = Butler(repo, writeable=True)
+    butler = Butler(repo, writeable=True, without_datastore=True)
     butler.registry.removeDatasetType(dataset_type_name)
diff --git a/tests/test_butler.py b/tests/test_butler.py
@@ -76,6 +76,7 @@ def mock_s3(*args: Any, **kwargs: Any) -> Any:  # type: ignore[no-untyped-def]
     FileDataset,
     FileTemplate,
     FileTemplateValidationError,
+    NullDatastore,
     StorageClassFactory,
     ValidationError,
     script,
@@ -2332,6 +2333,56 @@ class ChainedDatastoreTransfers(PosixDatastoreTransfers):
     configFile = os.path.join(TESTDIR, "config/basic/butler-chained.yaml")
 
 
+class NullDatastoreTestCase(unittest.TestCase):
+    """Test that we can fall back to a null datastore."""
+
+    # Need a good config to create the repo.
+    configFile = os.path.join(TESTDIR, "config/basic/butler.yaml")
+
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.storageClassFactory = StorageClassFactory()
+        cls.storageClassFactory.addFromConfig(cls.configFile)
+
+    def setUp(self) -> None:
+        """Create a new butler root for each test."""
+        self.root = makeTestTempDir(TESTDIR)
+        Butler.makeRepo(self.root, config=Config(self.configFile))
+
+    def tearDown(self) -> None:
+        removeTestTempDir(self.root)
+
+    def test_fallback(self) -> None:
+        # Read the butler config and mess with the datastore section.
+        bad_config = Config(os.path.join(self.root, "butler.yaml"))
+        bad_config["datastore", "cls"] = "lsst.not.a.datastore.Datastore"
+
+        with self.assertRaises(RuntimeError):
+            Butler(bad_config)
+
+        butler = Butler(bad_config, writeable=True, without_datastore=True)
+        self.assertIsInstance(butler._datastore, NullDatastore)
+
+        # Check that registry is working.
+        butler.registry.registerRun("MYRUN")
+        collections = butler.registry.queryCollections(...)
+        self.assertIn("MYRUN", set(collections))
+
+        # Create a ref.
+        dimensions = butler.dimensions.extract([])
+        storageClass = self.storageClassFactory.getStorageClass("StructuredDataDict")
+        datasetTypeName = "metric"
+        datasetType = DatasetType(datasetTypeName, dimensions, storageClass)
+        butler.registry.registerDatasetType(datasetType)
+        ref = DatasetRef(datasetType, {}, run="MYRUN")
+
+        # Check that datastore will complain.
+        with self.assertRaises(FileNotFoundError):
+            butler.get(ref)
+        with self.assertRaises(FileNotFoundError):
+            butler.getURI(ref)
+
+
 def setup_module(module: types.ModuleType) -> None:
     """Set up the module for pytest."""
     clean_environment()