Skip to content

Commit

Permalink
Merge pull request #685 from lsst/tickets/DM-34589
Browse files Browse the repository at this point in the history
DM-34589: Add a way to mark which dimensions populate others.
  • Loading branch information
TallJimbo committed Aug 1, 2023
2 parents 4927b47 + 0bccc2b commit 490508e
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 1 deletion.
3 changes: 3 additions & 0 deletions doc/changes/DM-34589.misc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Add dimensions config entries that declare that the `visit` dimension "populates" various dimension elements that define many-to-many relationships.

In the future, this will be used to ensure the correct records are included in exports of dimension records.
5 changes: 4 additions & 1 deletion python/lsst/daf/butler/configs/dimensions.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version: 3
version: 4
namespace: daf_butler
skypix:
# 'common' is the skypix system and level used to relate all other spatial
Expand Down Expand Up @@ -427,6 +427,7 @@ elements:
A many-to-many join table that provides region information for
visit-detector combinations.
requires: [visit, detector]
populated_by: visit
storage:
cls: lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage

Expand Down Expand Up @@ -459,6 +460,7 @@ elements:
A many-to-many join table that relates exposures to the visits they
belong to.
requires: [exposure, visit]
populated_by: visit
always_join: true
storage:
cls: lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage
Expand All @@ -468,6 +470,7 @@ elements:
A many-to-many join table that relates visits to the visit_systems they
belong to.
requires: [visit, visit_system]
populated_by: visit
always_join: true
storage:
cls: lsst.daf.butler.registry.dimensions.table.TableDimensionRecordStorage
Expand Down
1 change: 1 addition & 0 deletions python/lsst/daf/butler/core/dimensions/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ def _extractElementVisitors(self) -> Iterator[DimensionConstructionVisitor]:
metadata=metadata,
alwaysJoin=subconfig.get("always_join", False),
uniqueKeys=uniqueKeys,
populated_by=subconfig.get("populated_by", None),
)

def _extractTopologyVisitors(self) -> Iterator[DimensionConstructionVisitor]:
Expand Down
15 changes: 15 additions & 0 deletions python/lsst/daf/butler/core/dimensions/_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,10 +388,12 @@ def __init__(
implied: NamedValueAbstractSet[Dimension],
metadata: NamedValueAbstractSet[ddl.FieldSpec],
alwaysJoin: bool,
populated_by: Dimension | None,
):
super().__init__(name, storage=storage, implied=implied, metadata=metadata)
self._required = required
self._alwaysJoin = alwaysJoin
self._populated_by = populated_by

@property
def required(self) -> NamedValueAbstractSet[Dimension]:
Expand All @@ -403,6 +405,11 @@ def alwaysJoin(self) -> bool:
# Docstring inherited from DimensionElement.
return self._alwaysJoin

@property
def populated_by(self) -> Dimension | None:
# Docstring inherited.
return self._populated_by


class DatabaseDimensionElementConstructionVisitor(DimensionConstructionVisitor):
"""Construction visitor for database dimension and dimension combination.
Expand Down Expand Up @@ -439,6 +446,9 @@ class DatabaseDimensionElementConstructionVisitor(DimensionConstructionVisitor):
relationship between those dimensions that must always be satisfied.
Should only be provided when a `DimensionCombination` is being
constructed.
populated_by: `Dimension`, optional
The dimension that this element's records are always inserted,
exported, and imported alongside.
"""

def __init__(
Expand All @@ -450,6 +460,7 @@ def __init__(
metadata: Iterable[ddl.FieldSpec] = (),
uniqueKeys: Iterable[ddl.FieldSpec] = (),
alwaysJoin: bool = False,
populated_by: str | None = None,
):
super().__init__(name)
self._storage = storage
Expand All @@ -458,6 +469,7 @@ def __init__(
self._metadata = NamedValueSet(metadata).freeze()
self._uniqueKeys = NamedValueSet(uniqueKeys).freeze()
self._alwaysJoin = alwaysJoin
self._populated_by = populated_by

def hasDependenciesIn(self, others: Set[str]) -> bool:
# Docstring inherited from DimensionConstructionVisitor.
Expand Down Expand Up @@ -501,5 +513,8 @@ def visit(self, builder: DimensionConstructionBuilder) -> None:
implied=implied.freeze(),
metadata=self._metadata,
alwaysJoin=self._alwaysJoin,
populated_by=(
builder.dimensions[self._populated_by] if self._populated_by is not None else None
),
)
builder.elements.add(combination)
21 changes: 21 additions & 0 deletions python/lsst/daf/butler/core/dimensions/_elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,22 @@ def alwaysJoin(self) -> bool:
"""
return False

@property
@abstractmethod
def populated_by(self) -> Dimension | None:
"""The dimension that this element's records are always inserted,
exported, and imported alongside.
Notes
-----
When this is `None` (as it will be, at least at first, for any data
repositories created before this attribute was added), records for
this element will often need to be exported manually when datasets
associated with some other related dimension are exported, in order for
the post-import data repository to function as expected.
"""
raise NotImplementedError()


class Dimension(DimensionElement):
"""A dimension.
Expand Down Expand Up @@ -376,6 +392,11 @@ def alternateKeys(self) -> NamedValueAbstractSet[ddl.FieldSpec]:
_, *alternateKeys = self.uniqueKeys
return NamedValueSet(alternateKeys).freeze()

@property
def populated_by(self) -> Dimension:
# Docstring inherited.
return self


class DimensionCombination(DimensionElement):
"""Element with extra information.
Expand Down
14 changes: 14 additions & 0 deletions python/lsst/daf/butler/core/dimensions/_universe.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import logging
import math
import pickle
from collections import defaultdict
from collections.abc import Iterable, Mapping
from typing import TYPE_CHECKING, Any, ClassVar, TypeVar

Expand Down Expand Up @@ -170,6 +171,11 @@ def __new__(
# and implied. This is used for encode/decode.
self._dimensionIndices = {name: i for i, name in enumerate(self._dimensions.names)}

self._populates = defaultdict(NamedValueSet)
for element in self._elements:
if element.populated_by is not None:
self._populates[element.populated_by.name].add(element)

return self

@property
Expand Down Expand Up @@ -483,6 +489,12 @@ def getEncodeLength(self) -> int:
"""
return math.ceil(len(self._dimensions) / 8)

def get_elements_populated_by(self, dimension: Dimension) -> NamedValueAbstractSet[DimensionElement]:
"""Return the set of `DimensionElement` objects whose
`~DimensionElement.populated_by` atttribute is the given dimension.
"""
return self._populates[dimension.name]

@classmethod
def _unpickle(cls, version: int, namespace: str | None = None) -> DimensionUniverse:
"""Return an unpickled dimension universe.
Expand Down Expand Up @@ -542,6 +554,8 @@ def __deepcopy__(self, memo: dict) -> DimensionUniverse:

_packers: dict[str, DimensionPackerFactory]

_populates: defaultdict[str, NamedValueSet[DimensionElement]]

_version: int

_namespace: str
14 changes: 14 additions & 0 deletions tests/test_dimensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,20 @@ def testObservationDimensions(self):
self.assertEqual(graph.temporal.names, {"observation_timespans"})
self.assertEqual(next(iter(graph.spatial)).governor, self.universe["instrument"])
self.assertEqual(next(iter(graph.temporal)).governor, self.universe["instrument"])
self.assertEqual(self.universe["visit_definition"].populated_by, self.universe["visit"])
self.assertEqual(self.universe["visit_system_membership"].populated_by, self.universe["visit"])
self.assertEqual(self.universe["visit_detector_region"].populated_by, self.universe["visit"])
self.assertEqual(
self.universe.get_elements_populated_by(self.universe["visit"]),
NamedValueSet(
{
self.universe["visit"],
self.universe["visit_definition"],
self.universe["visit_system_membership"],
self.universe["visit_detector_region"],
}
),
)

def testSkyMapDimensions(self):
graph = DimensionGraph(self.universe, names=("patch",))
Expand Down

0 comments on commit 490508e

Please sign in to comment.