Skip to content

Commit

Permalink
Move DirectButler.__new__ to a factory function
Browse files Browse the repository at this point in the history
Moved most of the logic from DirectButler.__new__ into a factory function DirectButler.create_from_config.
 This allows us to use  __new__ as a common constructor shared by create_from_config() and clone().  This will help ensure that any new instance properties we add are correctly set up for all construction paths.
  • Loading branch information
dhirving committed Dec 26, 2023
1 parent 7667513 commit 02d14b6
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 48 deletions.
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ def from_config(
if butler_class_name is None or butler_class_name == "lsst.daf.butler.direct_butler.DirectButler":
from .direct_butler import DirectButler

return DirectButler(
return DirectButler.create_from_config(
butler_config,
options=options,
without_datastore=without_datastore,
Expand Down
120 changes: 73 additions & 47 deletions python/lsst/daf/butler/direct_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,14 +116,19 @@ class DirectButler(Butler): # numpydoc ignore=PR02
----------
config : `ButlerConfig`
The configuration for this Butler instance.
options : `ButlerInstanceOptions`
Default values and other settings for the Butler instance.
without_datastore : `bool`, optional
If `True` do not attach a datastore to this butler. Any attempts
to use a datastore will fail.
**kwargs : `str`
Default data ID key-value pairs. These may only identify "governor"
dimensions like ``instrument`` and ``skymap``.
registry : `SqlRegistry`
The object that manages dataset metadata and relationships.
datastore : Datastore
The object that manages actual dataset storage.
storageClasses : StorageClassFactory
An object that maps known storage class names to objects that fully
describe them.
Notes
-----
Most users should call `DirectButler`.``create_from_config`` or the
top-level `Butler`.``from_config`` instead of using this constructor
directly.
"""

# This is __new__ instead of __init__ because we have to support
Expand All @@ -135,61 +140,85 @@ class DirectButler(Butler): # numpydoc ignore=PR02
# a second time with the original arguments to Butler() when the instance
# is returned from Butler.__new__()
def __new__(
cls,
*,
config: ButlerConfig,
registry: SqlRegistry,
datastore: Datastore,
storageClasses: StorageClassFactory,
) -> DirectButler:
self = cast(DirectButler, super().__new__(cls))
self._config = config
self._registry = registry
self._datastore = datastore
self.storageClasses = storageClasses

# For execution butler the datastore needs a special
# dependency-inversion trick. This is not used by regular butler,
# but we do not have a way to distinguish regular butler from execution
# butler.
self._datastore.set_retrieve_dataset_type_method(self._retrieve_dataset_type)

self._registry_shim = RegistryShim(self)

return self

@classmethod
def create_from_config(
cls,
config: ButlerConfig,
*,
options: ButlerInstanceOptions,
without_datastore: bool = False,
) -> DirectButler:
self = cast(DirectButler, super().__new__(cls))
"""Construct a Butler instance from a configuration file.
Parameters
----------
config : `ButlerConfig`
The configuration for this Butler instance.
options : `ButlerInstanceOptions`
Default values and other settings for the Butler instance.
without_datastore : `bool`, optional
If `True` do not attach a datastore to this butler. Any attempts
to use a datastore will fail.
"""
if "run" in config or "collection" in config:
raise ValueError("Passing a run or collection via configuration is no longer supported.")

defaults = RegistryDefaults(
collections=options.collections, run=options.run, infer=options.inferDefaults, **options.kwargs
)
self._config = config
try:
butlerRoot = self._config.get("root", self._config.configDir)
butlerRoot = config.get("root", config.configDir)
writeable = options.writeable
if writeable is None:
writeable = options.run is not None
self._registry = _RegistryFactory(self._config).from_config(
registry = _RegistryFactory(config).from_config(
butlerRoot=butlerRoot, writeable=writeable, defaults=defaults
)
if without_datastore:
self._datastore = NullDatastore(None, None)
datastore: Datastore = NullDatastore(None, None)
else:
self._datastore = Datastore.fromConfig(
self._config, self._registry.getDatastoreBridgeManager(), butlerRoot=butlerRoot
datastore = Datastore.fromConfig(
config, registry.getDatastoreBridgeManager(), butlerRoot=butlerRoot
)
# TODO: Once datastore drops dependency on registry we can
# construct datastore first and pass opaque tables to registry
# constructor.
self._registry.make_datastore_tables(self._datastore.get_opaque_table_definitions())
self.storageClasses = StorageClassFactory()
self.storageClasses.addFromConfig(self._config)
except Exception:
registry.make_datastore_tables(datastore.get_opaque_table_definitions())
storageClasses = StorageClassFactory()
storageClasses.addFromConfig(config)

return DirectButler(
config=config, registry=registry, datastore=datastore, storageClasses=storageClasses
)
except:

Check failure on line 216 in python/lsst/daf/butler/direct_butler.py

View workflow job for this annotation

GitHub Actions / call-workflow / lint

E722

do not use bare 'except'
# Failures here usually mean that configuration is incomplete,
# just issue an error message which includes config file URI.
_LOG.error(f"Failed to instantiate Butler from config {self._config.configFile}.")
_LOG.error(f"Failed to instantiate Butler from config {config.configFile}.")
raise

if "run" in self._config or "collection" in self._config:
raise ValueError("Passing a run or collection via configuration is no longer supported.")

self._finish_init()

return self

def _finish_init(self) -> None:
"""Finish the setup of a new DirectButler instance."""
# For execution butler the datastore needs a special
# dependency-inversion trick. This is not used by regular butler,
# but we do not have a way to distinguish regular butler from execution
# butler.
self._datastore.set_retrieve_dataset_type_method(self._retrieve_dataset_type)

self._registry_shim = RegistryShim(self)

def _clone(
self,
*,
Expand All @@ -199,17 +228,14 @@ def _clone(
**kwargs: Any,
) -> DirectButler:
# Docstring inherited
new_butler = cast(DirectButler, super().__new__(type(self)))
defaults = RegistryDefaults(collections=collections, run=run, infer=inferDefaults, **kwargs)

new_butler._registry = self._registry.copy(defaults)
new_butler._datastore = self._datastore
new_butler.storageClasses = self.storageClasses
new_butler._config = self._config

new_butler._finish_init()

return new_butler
return DirectButler(
registry=self._registry.copy(defaults),
config=self._config,
datastore=self._datastore,
storageClasses=self.storageClasses,
)

GENERATION: ClassVar[int] = 3
"""This is a Generation 3 Butler.
Expand Down Expand Up @@ -261,7 +287,7 @@ def _unpickle(
butler : `Butler`
A new `Butler` instance.
"""
return cls(
return cls.create_from_config(
config=config,
options=ButlerInstanceOptions(
collections=collections, run=run, writeable=writeable, kwargs=defaultDataId
Expand Down

0 comments on commit 02d14b6

Please sign in to comment.