From e08bba5d8b8d2d03c5a1299f6ba575f2806c5990 Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Tue, 17 May 2022 12:03:29 -0400 Subject: [PATCH 1/3] Add a way to mark which dimensions populate others. In the future, this will be used to make it easier to export dimension data without knowing the details of the dimension combinations that represent many-to-main joins (DM-34838). There's no real gain to updating the dimensions configuration before that happens, but merging these changes early is useful because it will make software versions with just this change much better able to handle future data repositories that use them in their dimensions configuration, even if the associated functionality isn't available as a result. --- .../daf/butler/core/dimensions/_config.py | 1 + .../daf/butler/core/dimensions/_database.py | 15 +++++++++++++ .../daf/butler/core/dimensions/_elements.py | 21 +++++++++++++++++++ .../daf/butler/core/dimensions/_universe.py | 14 +++++++++++++ 4 files changed, 51 insertions(+) diff --git a/python/lsst/daf/butler/core/dimensions/_config.py b/python/lsst/daf/butler/core/dimensions/_config.py index 498061019a..4a43483b5c 100644 --- a/python/lsst/daf/butler/core/dimensions/_config.py +++ b/python/lsst/daf/butler/core/dimensions/_config.py @@ -214,6 +214,7 @@ def _extractElementVisitors(self) -> Iterator[DimensionConstructionVisitor]: metadata=metadata, alwaysJoin=subconfig.get("always_join", False), uniqueKeys=uniqueKeys, + populated_by=subconfig.get("populated_by", None), ) def _extractTopologyVisitors(self) -> Iterator[DimensionConstructionVisitor]: diff --git a/python/lsst/daf/butler/core/dimensions/_database.py b/python/lsst/daf/butler/core/dimensions/_database.py index e8266670ac..1829ed95c4 100644 --- a/python/lsst/daf/butler/core/dimensions/_database.py +++ b/python/lsst/daf/butler/core/dimensions/_database.py @@ -388,10 +388,12 @@ def __init__( implied: NamedValueAbstractSet[Dimension], metadata: NamedValueAbstractSet[ddl.FieldSpec], alwaysJoin: bool, + populated_by: Dimension | None, ): super().__init__(name, storage=storage, implied=implied, metadata=metadata) self._required = required self._alwaysJoin = alwaysJoin + self._populated_by = populated_by @property def required(self) -> NamedValueAbstractSet[Dimension]: @@ -403,6 +405,11 @@ def alwaysJoin(self) -> bool: # Docstring inherited from DimensionElement. return self._alwaysJoin + @property + def populated_by(self) -> Dimension | None: + # Docstring inherited. + return self._populated_by + class DatabaseDimensionElementConstructionVisitor(DimensionConstructionVisitor): """Construction visitor for database dimension and dimension combination. @@ -439,6 +446,9 @@ class DatabaseDimensionElementConstructionVisitor(DimensionConstructionVisitor): relationship between those dimensions that must always be satisfied. Should only be provided when a `DimensionCombination` is being constructed. + populated_by: `Dimension`, optional + The dimension that this element's records are always inserted, + exported, and imported alongside. """ def __init__( @@ -450,6 +460,7 @@ def __init__( metadata: Iterable[ddl.FieldSpec] = (), uniqueKeys: Iterable[ddl.FieldSpec] = (), alwaysJoin: bool = False, + populated_by: str | None = None, ): super().__init__(name) self._storage = storage @@ -458,6 +469,7 @@ def __init__( self._metadata = NamedValueSet(metadata).freeze() self._uniqueKeys = NamedValueSet(uniqueKeys).freeze() self._alwaysJoin = alwaysJoin + self._populated_by = populated_by def hasDependenciesIn(self, others: Set[str]) -> bool: # Docstring inherited from DimensionConstructionVisitor. @@ -501,5 +513,8 @@ def visit(self, builder: DimensionConstructionBuilder) -> None: implied=implied.freeze(), metadata=self._metadata, alwaysJoin=self._alwaysJoin, + populated_by=( + builder.dimensions[self._populated_by] if self._populated_by is not None else None + ), ) builder.elements.add(combination) diff --git a/python/lsst/daf/butler/core/dimensions/_elements.py b/python/lsst/daf/butler/core/dimensions/_elements.py index 01366a53ff..84006cb82d 100644 --- a/python/lsst/daf/butler/core/dimensions/_elements.py +++ b/python/lsst/daf/butler/core/dimensions/_elements.py @@ -330,6 +330,22 @@ def alwaysJoin(self) -> bool: """ return False + @property + @abstractmethod + def populated_by(self) -> Dimension | None: + """The dimension that this element's records are always inserted, + exported, and imported alongside. + + Notes + ----- + When this is `None` (as it will be, at least at first, for any data + repositories created before this attribute was added), records for + this element will often need to be exported manually when datasets + associated with some other related dimension are exported, in order for + the post-import data repository to function as expected. + """ + raise NotImplementedError() + class Dimension(DimensionElement): """A dimension. @@ -376,6 +392,11 @@ def alternateKeys(self) -> NamedValueAbstractSet[ddl.FieldSpec]: _, *alternateKeys = self.uniqueKeys return NamedValueSet(alternateKeys).freeze() + @property + def populated_by(self) -> Dimension: + # Docstring inherited. + return self + class DimensionCombination(DimensionElement): """Element with extra information. diff --git a/python/lsst/daf/butler/core/dimensions/_universe.py b/python/lsst/daf/butler/core/dimensions/_universe.py index 0c35a1d4f6..d903f0262e 100644 --- a/python/lsst/daf/butler/core/dimensions/_universe.py +++ b/python/lsst/daf/butler/core/dimensions/_universe.py @@ -26,6 +26,7 @@ import logging import math import pickle +from collections import defaultdict from collections.abc import Iterable, Mapping from typing import TYPE_CHECKING, Any, ClassVar, TypeVar @@ -170,6 +171,11 @@ def __new__( # and implied. This is used for encode/decode. self._dimensionIndices = {name: i for i, name in enumerate(self._dimensions.names)} + self._populates = defaultdict(NamedValueSet) + for element in self._elements: + if element.populated_by is not None: + self._populates[element.populated_by.name].add(element) + return self @property @@ -483,6 +489,12 @@ def getEncodeLength(self) -> int: """ return math.ceil(len(self._dimensions) / 8) + def get_elements_populated_by(self, dimension: Dimension) -> NamedValueAbstractSet[DimensionElement]: + """Return the set of `DimensionElement` objects whose + `~DimensionElement.populated_by` atttribute is the given dimension. + """ + return self._populates[dimension.name] + @classmethod def _unpickle(cls, version: int, namespace: str | None = None) -> DimensionUniverse: """Return an unpickled dimension universe. @@ -542,6 +554,8 @@ def __deepcopy__(self, memo: dict) -> DimensionUniverse: _packers: dict[str, DimensionPackerFactory] + _populates: defaultdict[str, NamedValueSet[DimensionElement]] + _version: int _namespace: str From 0f8132467e28cc06d68e79529270071787db4b5c Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Fri, 28 Jul 2023 13:18:04 -0400 Subject: [PATCH 2/3] Mark visit join-table elements as populated by visit. --- python/lsst/daf/butler/configs/dimensions.yaml | 5 ++++- tests/test_dimensions.py | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/python/lsst/daf/butler/configs/dimensions.yaml b/python/lsst/daf/butler/configs/dimensions.yaml index 8e0d4e00e1..a61ef88c4a 100644 --- a/python/lsst/daf/butler/configs/dimensions.yaml +++ b/python/lsst/daf/butler/configs/dimensions.yaml @@ -1,4 +1,4 @@ -version: 3 +version: 4 namespace: daf_butler skypix: # 'common' is the skypix system and level used to relate all other spatial @@ -427,6 +427,7 @@ elements: A many-to-many join table that provides region information for visit-detector combinations. requires: [visit, detector] + populated_by: visit storage: cls: lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage @@ -459,6 +460,7 @@ elements: A many-to-many join table that relates exposures to the visits they belong to. requires: [exposure, visit] + populated_by: visit always_join: true storage: cls: lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage @@ -468,6 +470,7 @@ elements: A many-to-many join table that relates visits to the visit_systems they belong to. requires: [visit, visit_system] + populated_by: visit always_join: true storage: cls: lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage diff --git a/tests/test_dimensions.py b/tests/test_dimensions.py index f510c61d47..845489abdb 100644 --- a/tests/test_dimensions.py +++ b/tests/test_dimensions.py @@ -285,6 +285,20 @@ def testObservationDimensions(self): self.assertEqual(graph.temporal.names, {"observation_timespans"}) self.assertEqual(next(iter(graph.spatial)).governor, self.universe["instrument"]) self.assertEqual(next(iter(graph.temporal)).governor, self.universe["instrument"]) + self.assertEqual(self.universe["visit_definition"].populated_by, self.universe["visit"]) + self.assertEqual(self.universe["visit_system_membership"].populated_by, self.universe["visit"]) + self.assertEqual(self.universe["visit_detector_region"].populated_by, self.universe["visit"]) + self.assertEqual( + self.universe.get_elements_populated_by(self.universe["visit"]), + NamedValueSet( + { + self.universe["visit"], + self.universe["visit_definition"], + self.universe["visit_system_membership"], + self.universe["visit_detector_region"], + } + ), + ) def testSkyMapDimensions(self): graph = DimensionGraph(self.universe, names=("patch",)) From 0bccc2b534c9fa59190d45868f34678265d45ccc Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Fri, 28 Jul 2023 13:25:21 -0400 Subject: [PATCH 3/3] Add changelog entry. --- doc/changes/DM-34589.misc.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/changes/DM-34589.misc.md diff --git a/doc/changes/DM-34589.misc.md b/doc/changes/DM-34589.misc.md new file mode 100644 index 0000000000..9f8159ceb9 --- /dev/null +++ b/doc/changes/DM-34589.misc.md @@ -0,0 +1,3 @@ +Add dimensions config entries that declare that the `visit` dimension "populates" various dimension elements that define many-to-many relationships. + +In the future, this will be used to ensure the correct records are included in exports of dimension records.