From a305ef360399d1807a1b768d287d8832e292455f Mon Sep 17 00:00:00 2001 From: Andy Salnikov Date: Tue, 11 Jul 2023 19:38:37 -0700 Subject: [PATCH] Move registry factory methods to a separate class. Trying to further separate concerns into independent classes. Re-introduced `Registry.createFromConfig` to avoid updating tests in many other packages. --- python/lsst/daf/butler/_butler.py | 7 +- python/lsst/daf/butler/registry/__init__.py | 1 + .../daf/butler/registry/_butler_registry.py | 41 +--- python/lsst/daf/butler/registry/_registry.py | 43 ++++ .../daf/butler/registry/_registry_factory.py | 185 ++++++++++++++++++ tests/test_dimensions.py | 4 +- tests/test_obscore.py | 4 +- tests/test_postgresql.py | 6 +- tests/test_quantumBackedButler.py | 4 +- tests/test_query_relations.py | 4 +- tests/test_simpleButler.py | 4 +- tests/test_sqlite.py | 10 +- 12 files changed, 256 insertions(+), 57 deletions(-) create mode 100644 python/lsst/daf/butler/registry/_registry_factory.py diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py index 87b3f6f964..22ccb944d8 100644 --- a/python/lsst/daf/butler/_butler.py +++ b/python/lsst/daf/butler/_butler.py @@ -87,6 +87,7 @@ Registry, RegistryConfig, RegistryDefaults, + RegistryFactory, ) from .transfers import RepoExportContext @@ -224,7 +225,7 @@ def __init__( butlerRoot = self._config.configDir if writeable is None: writeable = run is not None - self._registry = ButlerRegistry.fromConfig( + self._registry = RegistryFactory.from_config( self._config, butlerRoot=butlerRoot, writeable=writeable, defaults=defaults ) self._datastore = Datastore.fromConfig( @@ -462,7 +463,9 @@ def makeRepo( # Create Registry and populate tables registryConfig = RegistryConfig(config.get("registry")) dimensionConfig = DimensionConfig(dimensionConfig) - ButlerRegistry.createFromConfig(registryConfig, dimensionConfig=dimensionConfig, butlerRoot=root_uri) + RegistryFactory.create_from_config( + registryConfig, dimensionConfig=dimensionConfig, butlerRoot=root_uri + ) log.verbose("Wrote new Butler configuration file to %s", configURI) diff --git a/python/lsst/daf/butler/registry/__init__.py b/python/lsst/daf/butler/registry/__init__.py index 2a346aa3a3..6151e93c24 100644 --- a/python/lsst/daf/butler/registry/__init__.py +++ b/python/lsst/daf/butler/registry/__init__.py @@ -28,6 +28,7 @@ from ._defaults import * from ._exceptions import * from ._registry import * +from ._registry_factory import * from .wildcards import CollectionSearch # Some modules intentionally not imported, either because they are purely diff --git a/python/lsst/daf/butler/registry/_butler_registry.py b/python/lsst/daf/butler/registry/_butler_registry.py index 7406f7ea61..8a6d3bb633 100644 --- a/python/lsst/daf/butler/registry/_butler_registry.py +++ b/python/lsst/daf/butler/registry/_butler_registry.py @@ -27,7 +27,6 @@ from typing import TYPE_CHECKING from lsst.resources import ResourcePathExpression -from lsst.utils import doImportType from ..core import Config, DimensionConfig from ._config import RegistryConfig @@ -79,38 +78,7 @@ def forceRegistryConfig( return config @classmethod - def determineTrampoline( - cls, config: ButlerConfig | RegistryConfig | Config | str | None - ) -> tuple[type[ButlerRegistry], RegistryConfig]: - """Return class to use to instantiate real registry. - - Parameters - ---------- - config : `RegistryConfig` or `str`, optional - Registry configuration, if missing then default configuration will - be loaded from registry.yaml. - - Returns - ------- - requested_cls : `type` of `ButlerRegistry` - The real registry class to use. - registry_config : `RegistryConfig` - The `RegistryConfig` to use. - """ - config = cls.forceRegistryConfig(config) - - # Default to the standard registry - registry_cls_name = config.get("cls", "lsst.daf.butler.registries.sql.SqlRegistry") - registry_cls = doImportType(registry_cls_name) - if registry_cls is cls: - raise ValueError("Can not instantiate the abstract base Registry from config") - if not issubclass(registry_cls, ButlerRegistry): - raise TypeError( - f"Registry class obtained from config {registry_cls_name} is not a ButlerRegistry class." - ) - return registry_cls, config - - @classmethod + @abstractmethod def createFromConfig( cls, config: RegistryConfig | str | None = None, @@ -144,10 +112,10 @@ def createFromConfig( use from configuration. Each subclass should implement this method even if it can not create a registry. """ - registry_cls, registry_config = cls.determineTrampoline(config) - return registry_cls.createFromConfig(registry_config, dimensionConfig, butlerRoot) + raise NotImplementedError() @classmethod + @abstractmethod def fromConfig( cls, config: ButlerConfig | RegistryConfig | Config | str, @@ -185,8 +153,7 @@ def fromConfig( # subclass. No implementation should ever use this implementation # directly. If no class is specified, default to the standard # registry. - registry_cls, registry_config = cls.determineTrampoline(config) - return registry_cls.fromConfig(config, butlerRoot, writeable, defaults) + raise NotImplementedError() @abstractmethod def copy(self, defaults: RegistryDefaults | None = None) -> ButlerRegistry: diff --git a/python/lsst/daf/butler/registry/_registry.py b/python/lsst/daf/butler/registry/_registry.py index 7af253b62e..81b45dae4a 100644 --- a/python/lsst/daf/butler/registry/_registry.py +++ b/python/lsst/daf/butler/registry/_registry.py @@ -31,6 +31,8 @@ from types import EllipsisType from typing import TYPE_CHECKING, Any +from lsst.resources import ResourcePathExpression + from ..core import ( DataCoordinate, DataId, @@ -40,6 +42,7 @@ DatasetRef, DatasetType, Dimension, + DimensionConfig, DimensionElement, DimensionGraph, DimensionRecord, @@ -50,6 +53,7 @@ ) from ._collection_summary import CollectionSummary from ._collectionType import CollectionType +from ._config import RegistryConfig from ._defaults import RegistryDefaults from .queries import DataCoordinateQueryResults, DatasetQueryResults, DimensionRecordQueryResults from .wildcards import CollectionWildcard @@ -71,6 +75,45 @@ class Registry(ABC): implementations. """ + @classmethod + def createFromConfig( + cls, + config: RegistryConfig | str | None = None, + dimensionConfig: DimensionConfig | str | None = None, + butlerRoot: ResourcePathExpression | None = None, + ) -> Registry: + """Create registry database and return `Registry` instance. + + This method initializes database contents, database must be empty + prior to calling this method. + + Parameters + ---------- + config : `RegistryConfig` or `str`, optional + Registry configuration, if missing then default configuration will + be loaded from registry.yaml. + dimensionConfig : `DimensionConfig` or `str`, optional + Dimensions configuration, if missing then default configuration + will be loaded from dimensions.yaml. + butlerRoot : convertible to `lsst.resources.ResourcePath`, optional + Path to the repository root this `Registry` will manage. + + Returns + ------- + registry : `Registry` + A new `Registry` instance. + + Notes + ----- + This method is for backward compatibility only, until all clients + migrate to use new `~lsst.daf.butler.registry.RegistryFactory` factory + class. Regular clients of registry class do not use this method, it is + only used by tests in multiple packages. + """ + from ._registry_factory import RegistryFactory + + return RegistryFactory.create_from_config(config, dimensionConfig, butlerRoot) + @abstractmethod def isWriteable(self) -> bool: """Return `True` if this registry allows write operations, and `False` diff --git a/python/lsst/daf/butler/registry/_registry_factory.py b/python/lsst/daf/butler/registry/_registry_factory.py new file mode 100644 index 0000000000..9b8c712cb9 --- /dev/null +++ b/python/lsst/daf/butler/registry/_registry_factory.py @@ -0,0 +1,185 @@ +# This file is part of daf_butler. +# +# Developed for the LSST Data Management System. +# This product includes software developed by the LSST Project +# (http://www.lsst.org). +# See the COPYRIGHT file at the top-level directory of this distribution +# for details of code ownership. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from __future__ import annotations + +__all__ = ("RegistryFactory",) + +from typing import TYPE_CHECKING + +from lsst.resources import ResourcePathExpression +from lsst.utils import doImportType + +from ..core import Config, DimensionConfig +from ._butler_registry import ButlerRegistry +from ._config import RegistryConfig +from ._defaults import RegistryDefaults + +if TYPE_CHECKING: + from .._butlerConfig import ButlerConfig + + +class RegistryFactory: + """Interface for creating and initializing Registry instances. + + Each registry implementation can have its own constructor parameters. + The assumption is that an instance of a specific subclass will be + constructed from configuration using ``RegistryClass.fromConfig()`` or + ``RegistryClass.createFromConfig()``. + + This class will look for a ``cls`` entry in registry configuration object + (defaulting to ``SqlRegistry``), import that class, and call one of the + above methods on the imported class. + """ + + @classmethod + def force_registry_config( + cls, config: ButlerConfig | RegistryConfig | Config | str | None + ) -> RegistryConfig: + """Force the supplied config to a `RegistryConfig`. + + Parameters + ---------- + config : `RegistryConfig`, `Config` or `str` or `None` + Registry configuration, if missing then default configuration will + be loaded from registry.yaml. + + Returns + ------- + registry_config : `RegistryConfig` + A registry config. + """ + if not isinstance(config, RegistryConfig): + if isinstance(config, (str, Config)) or config is None: + config = RegistryConfig(config) + else: + raise ValueError(f"Incompatible Registry configuration: {config}") + return config + + @classmethod + def determine_trampoline( + cls, config: ButlerConfig | RegistryConfig | Config | str | None + ) -> tuple[type[ButlerRegistry], RegistryConfig]: + """Return class to use to instantiate real registry. + + Parameters + ---------- + config : `RegistryConfig` or `str`, optional + Registry configuration, if missing then default configuration will + be loaded from registry.yaml. + + Returns + ------- + requested_cls : `type` of `ButlerRegistry` + The real registry class to use. + registry_config : `RegistryConfig` + The `RegistryConfig` to use. + """ + config = cls.force_registry_config(config) + + # Default to the standard registry + registry_cls_name = config.get("cls", "lsst.daf.butler.registries.sql.SqlRegistry") + registry_cls = doImportType(registry_cls_name) + if registry_cls is cls: + raise ValueError("Can not instantiate the abstract base Registry from config") + if not issubclass(registry_cls, ButlerRegistry): + raise TypeError( + f"Registry class obtained from config {registry_cls_name} is not a ButlerRegistry class." + ) + return registry_cls, config + + @classmethod + def create_from_config( + cls, + config: RegistryConfig | str | None = None, + dimensionConfig: DimensionConfig | str | None = None, + butlerRoot: ResourcePathExpression | None = None, + ) -> ButlerRegistry: + """Create registry database and return `ButlerRegistry` instance. + + This method initializes database contents, database must be empty + prior to calling this method. + + Parameters + ---------- + config : `RegistryConfig` or `str`, optional + Registry configuration, if missing then default configuration will + be loaded from registry.yaml. + dimensionConfig : `DimensionConfig` or `str`, optional + Dimensions configuration, if missing then default configuration + will be loaded from dimensions.yaml. + butlerRoot : convertible to `lsst.resources.ResourcePath`, optional + Path to the repository root this `Registry` will manage. + + Returns + ------- + registry : `ButlerRegistry` + A new `ButlerRegistry` instance. + + Notes + ----- + This class will determine the concrete `ButlerRegistry` subclass to + use from configuration. Each subclass should implement this method + even if it can not create a registry. + """ + registry_cls, registry_config = cls.determine_trampoline(config) + return registry_cls.createFromConfig(registry_config, dimensionConfig, butlerRoot) + + @classmethod + def from_config( + cls, + config: ButlerConfig | RegistryConfig | Config | str, + butlerRoot: ResourcePathExpression | None = None, + writeable: bool = True, + defaults: RegistryDefaults | None = None, + ) -> ButlerRegistry: + """Create `ButlerRegistry` subclass instance from ``config``. + + Registry database must be initialized prior to calling this method. + + Parameters + ---------- + config : `ButlerConfig`, `RegistryConfig`, `Config` or `str` + Registry configuration + butlerRoot : convertible to `lsst.resources.ResourcePath`, optional + Path to the repository root this `Registry` will manage. + writeable : `bool`, optional + If `True` (default) create a read-write connection to the database. + defaults : `~lsst.daf.butler.registry.RegistryDefaults`, optional + Default collection search path and/or output `~CollectionType.RUN` + collection. + + Returns + ------- + registry : `ButlerRegistry` (subclass) + A new `ButlerRegistry` subclass instance. + + Notes + ----- + This class will determine the concrete `ButlerRegistry` subclass to + use from configuration. Each subclass should implement this method. + """ + # The base class implementation should trampoline to the correct + # subclass. No implementation should ever use this implementation + # directly. If no class is specified, default to the standard + # registry. + registry_cls, registry_config = cls.determine_trampoline(config) + return registry_cls.fromConfig(config, butlerRoot, writeable, defaults) diff --git a/tests/test_dimensions.py b/tests/test_dimensions.py index 4e17930bcb..95d067130f 100644 --- a/tests/test_dimensions.py +++ b/tests/test_dimensions.py @@ -45,7 +45,7 @@ TimespanDatabaseRepresentation, YamlRepoImportBackend, ) -from lsst.daf.butler.registry import ButlerRegistry, RegistryConfig +from lsst.daf.butler.registry import RegistryConfig, RegistryFactory DIMENSION_DATA_FILE = os.path.normpath( os.path.join(os.path.dirname(__file__), "data", "registry", "hsc-rc2-subset.yaml") @@ -64,7 +64,7 @@ def loadDimensionData() -> DataCoordinateSequence: # data and retreive it as a set of DataCoordinate objects. config = RegistryConfig() config["db"] = "sqlite://" - registry = ButlerRegistry.createFromConfig(config) + registry = RegistryFactory.create_from_config(config) with open(DIMENSION_DATA_FILE) as stream: backend = YamlRepoImportBackend(stream, registry) backend.register() diff --git a/tests/test_obscore.py b/tests/test_obscore.py index 6b1dbddd56..f4f0c9b45a 100644 --- a/tests/test_obscore.py +++ b/tests/test_obscore.py @@ -38,7 +38,7 @@ StorageClassFactory, ) from lsst.daf.butler.registries.sql import SqlRegistry -from lsst.daf.butler.registry import ButlerRegistry, Registry, RegistryConfig +from lsst.daf.butler.registry import ButlerRegistry, Registry, RegistryConfig, RegistryFactory from lsst.daf.butler.registry.obscore import ( DatasetTypeConfig, ObsCoreConfig, @@ -67,7 +67,7 @@ def make_registry( ) -> ButlerRegistry: """Create new empty Registry.""" config = self.make_registry_config(collections, collection_type) - registry = ButlerRegistry.createFromConfig(config, butlerRoot=self.root) + registry = RegistryFactory.create_from_config(config, butlerRoot=self.root) self.initialize_registry(registry) return registry diff --git a/tests/test_postgresql.py b/tests/test_postgresql.py index 5f139db579..3c0bcbbb02 100644 --- a/tests/test_postgresql.py +++ b/tests/test_postgresql.py @@ -40,7 +40,7 @@ import sqlalchemy from lsst.daf.butler import Timespan, ddl -from lsst.daf.butler.registry import ButlerRegistry +from lsst.daf.butler.registry import ButlerRegistry, RegistryFactory from lsst.daf.butler.registry.databases.postgresql import PostgresqlDatabase, _RangeTimespanType from lsst.daf.butler.registry.tests import DatabaseTests, RegistryTests from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir @@ -245,9 +245,9 @@ def makeRegistry(self, share_repo_with: ButlerRegistry | None = None) -> ButlerR config["db"] = self.server.url() config["namespace"] = namespace if share_repo_with is None: - return ButlerRegistry.createFromConfig(config) + return RegistryFactory.create_from_config(config) else: - return ButlerRegistry.fromConfig(config) + return RegistryFactory.from_config(config) class PostgresqlRegistryNameKeyCollMgrUUIDTestCase(PostgresqlRegistryTests, unittest.TestCase): diff --git a/tests/test_quantumBackedButler.py b/tests/test_quantumBackedButler.py index c027990f21..3ae6c1f80a 100644 --- a/tests/test_quantumBackedButler.py +++ b/tests/test_quantumBackedButler.py @@ -37,7 +37,7 @@ RegistryConfig, StorageClass, ) -from lsst.daf.butler.registry import ButlerRegistry +from lsst.daf.butler.registry import RegistryFactory from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir from lsst.resources import ResourcePath @@ -55,7 +55,7 @@ def setUp(self) -> None: # Make a butler and import dimension definitions. registryConfig = RegistryConfig(self.config.get("registry")) - ButlerRegistry.createFromConfig(registryConfig, butlerRoot=self.root) + RegistryFactory.create_from_config(registryConfig, butlerRoot=self.root) self.butler = Butler(self.config, writeable=True, run="RUN") self.butler.import_(filename=os.path.join(TESTDIR, "data", "registry", "base.yaml")) diff --git a/tests/test_query_relations.py b/tests/test_query_relations.py index 120d777b87..960597d482 100644 --- a/tests/test_query_relations.py +++ b/tests/test_query_relations.py @@ -25,7 +25,7 @@ import re import unittest -from lsst.daf.butler.registry import ButlerRegistry, MissingSpatialOverlapError, RegistryConfig, queries +from lsst.daf.butler.registry import MissingSpatialOverlapError, RegistryConfig, RegistryFactory, queries from lsst.daf.butler.transfers import YamlRepoImportBackend TESTDIR = os.path.abspath(os.path.dirname(__file__)) @@ -52,7 +52,7 @@ class TestQueryRelationsTests(unittest.TestCase): def setUpClass(cls) -> None: config = RegistryConfig() config["db"] = "sqlite://" - cls.registry = ButlerRegistry.createFromConfig(config) + cls.registry = RegistryFactory.create_from_config(config) # We need just enough test data to have valid dimension records for # all of the dimensions we're concerned with, and we want to pick # values for each dimension that correspond to a spatiotemporal diff --git a/tests/test_simpleButler.py b/tests/test_simpleButler.py index 8178438da0..588da980f3 100644 --- a/tests/test_simpleButler.py +++ b/tests/test_simpleButler.py @@ -35,7 +35,7 @@ import astropy.time from lsst.daf.butler import Butler, ButlerConfig, CollectionType, DatasetId, DatasetRef, DatasetType, Timespan -from lsst.daf.butler.registry import ButlerRegistry, RegistryConfig, RegistryDefaults +from lsst.daf.butler.registry import RegistryConfig, RegistryDefaults, RegistryFactory from lsst.daf.butler.tests import DatastoreMock from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir @@ -71,7 +71,7 @@ def makeButler(self, **kwargs: Any) -> Butler: # have to make a registry first registryConfig = RegistryConfig(config.get("registry")) - ButlerRegistry.createFromConfig(registryConfig) + RegistryFactory.create_from_config(registryConfig) butler = Butler(config, **kwargs) DatastoreMock.apply(butler) diff --git a/tests/test_sqlite.py b/tests/test_sqlite.py index 48467c09c2..cfa8b48dcd 100644 --- a/tests/test_sqlite.py +++ b/tests/test_sqlite.py @@ -28,7 +28,7 @@ import sqlalchemy from lsst.daf.butler import ddl -from lsst.daf.butler.registry import ButlerRegistry +from lsst.daf.butler.registry import ButlerRegistry, RegistryFactory from lsst.daf.butler.registry.databases.sqlite import SqliteDatabase from lsst.daf.butler.registry.tests import DatabaseTests, RegistryTests from lsst.daf.butler.tests.utils import makeTestTempDir, removeTestTempDir @@ -204,9 +204,9 @@ def makeRegistry(self, share_repo_with: ButlerRegistry | None = None) -> ButlerR config = self.makeRegistryConfig() config["db"] = f"sqlite:///{filename}" if share_repo_with is None: - return ButlerRegistry.createFromConfig(config, butlerRoot=self.root) + return RegistryFactory.create_from_config(config, butlerRoot=self.root) else: - return ButlerRegistry.fromConfig(config, butlerRoot=self.root) + return RegistryFactory.from_config(config, butlerRoot=self.root) class SqliteFileRegistryNameKeyCollMgrUUIDTestCase(SqliteFileRegistryTests, unittest.TestCase): @@ -247,7 +247,7 @@ def makeRegistry(self, share_repo_with: ButlerRegistry | None = None) -> ButlerR return None config = self.makeRegistryConfig() config["db"] = "sqlite://" - return ButlerRegistry.createFromConfig(config) + return RegistryFactory.create_from_config(config) def testMissingAttributes(self): """Test for instantiating a registry against outdated schema which @@ -258,7 +258,7 @@ def testMissingAttributes(self): config = self.makeRegistryConfig() config["db"] = "sqlite://" with self.assertRaises(LookupError): - ButlerRegistry.fromConfig(config) + RegistryFactory.from_config(config) class SqliteMemoryRegistryNameKeyCollMgrUUIDTestCase(unittest.TestCase, SqliteMemoryRegistryTests):