Merge pull request #1036 from lsst/tickets/DM-41326

DM-41326: remove DimensionGraph and related interfaces
lsst · Jul 25, 2024 · cdbbd14 · cdbbd14
2 parents fd5547a + 73ae713
commit cdbbd14
Show file tree

Hide file tree

Showing 45 changed files with 343 additions and 1,570 deletions.
diff --git a/doc/changes/DM-41326.removal.md b/doc/changes/DM-41326.removal.md
@@ -0,0 +1,3 @@
+Remove `DimensionGraph` and the `Mapping` interface to `DataCoordinate`, along with most other public interfaces that utilize `DimensionElement` instances instead of just their string names.
+
+See [RFC-834](https://rubinobs.atlassian.net/browse/RFC-834) for full details and rationale.
diff --git a/python/lsst/daf/butler/_dataset_ref.py b/python/lsst/daf/butler/_dataset_ref.py
@@ -50,7 +50,7 @@
 from ._dataset_type import DatasetType, SerializedDatasetType
 from ._named import NamedKeyDict
 from .datastore.stored_file_info import StoredDatastoreItemInfo
-from .dimensions import DataCoordinate, DimensionGraph, DimensionUniverse, SerializedDataCoordinate
+from .dimensions import DataCoordinate, DimensionGroup, DimensionUniverse, SerializedDataCoordinate
 from .json import from_json_pydantic, to_json_pydantic
 from .persistence_context import PersistenceContextVars
 
@@ -363,7 +363,7 @@ def __hash__(self) -> int:
         return hash((self.datasetType, self.dataId, self.id))
 
     @property
-    def dimensions(self) -> DimensionGraph:
+    def dimensions(self) -> DimensionGroup:
         """Dimensions associated with the underlying `DatasetType`."""
         return self.datasetType.dimensions
 

diff --git a/python/lsst/daf/butler/_dataset_type.py b/python/lsst/daf/butler/_dataset_type.py
@@ -39,12 +39,12 @@
 
 from ._config_support import LookupKey
 from ._storage_class import StorageClass, StorageClassFactory
-from .dimensions import DimensionGraph, DimensionGroup, SerializedDimensionGraph
+from .dimensions import DimensionGroup
 from .json import from_json_pydantic, to_json_pydantic
 from .persistence_context import PersistenceContextVars
 
 if TYPE_CHECKING:
-    from .dimensions import Dimension, DimensionUniverse
+    from .dimensions import DimensionUniverse
     from .registry import Registry
 
 
@@ -59,7 +59,7 @@ class SerializedDatasetType(BaseModel):
 
     name: StrictStr
     storageClass: StrictStr | None = None
-    dimensions: SerializedDimensionGraph | list[StrictStr] | None = None
+    dimensions: list[StrictStr] | None = None
     parentStorageClass: StrictStr | None = None
     isCalibration: StrictBool = False
 
@@ -69,7 +69,7 @@ def direct(
         *,
         name: str,
         storageClass: str | None = None,
-        dimensions: list | dict | None = None,
+        dimensions: list | None = None,
         parentStorageClass: str | None = None,
         isCalibration: bool = False,
     ) -> SerializedDatasetType:
@@ -88,7 +88,7 @@ def direct(
             The name of the dataset type.
         storageClass : `str` or `None`
             The name of the storage class.
-        dimensions : `list` or `dict` or `None`
+        dimensions : `list` or `None`
             The dimensions associated with this dataset type.
         parentStorageClass : `str` or `None`
             The parent storage class name if this is a component.
@@ -104,16 +104,7 @@ def direct(
         key = (name, storageClass or "")
         if cache is not None and (type_ := cache.get(key, None)) is not None:
             return type_
-
-        serialized_dimensions: list[str] | None
-        match dimensions:
-            case list():
-                serialized_dimensions = dimensions
-            case dict():
-                serialized_dimensions = SerializedDimensionGraph.direct(**dimensions).names
-            case None:
-                serialized_dimensions = None
-
+        serialized_dimensions = dimensions if dimensions is not None else None
         node = cls.model_construct(
             name=name,
             storageClass=storageClass,
@@ -148,11 +139,9 @@ class DatasetType:
         and underscores.  Component dataset types should contain a single
         period separating the base dataset type name from the component name
         (and may be recursive).
-    dimensions : `DimensionGroup`, `DimensionGraph`, or \
-            `~collections.abc.Iterable` [ `Dimension` or `str` ]
+    dimensions : `DimensionGroup` or `~collections.abc.Iterable` [ `str` ]
         Dimensions used to label and relate instances of this `DatasetType`.
-        If not a `DimensionGraph` or `DimensionGroup`, ``universe`` must be
-        provided as well.
+        If not a `DimensionGroup`, ``universe`` must be provided as well.
     storageClass : `StorageClass` or `str`
         Instance of a `StorageClass` or name of `StorageClass` that defines
         how this `DatasetType` is persisted.
@@ -162,7 +151,7 @@ class DatasetType:
         is not a component.
     universe : `DimensionUniverse`, optional
         Set of all known dimensions, used to normalize ``dimensions`` if it
-        is not already a `DimensionGraph`.
+        is not already a `DimensionGroup`.
     isCalibration : `bool`, optional
         If `True`, this dataset type may be included in
         `~CollectionType.CALIBRATION` collections.
@@ -209,7 +198,7 @@ def nameWithComponent(datasetTypeName: str, componentName: str) -> str:
     def __init__(
         self,
         name: str,
-        dimensions: DimensionGroup | DimensionGraph | Iterable[Dimension | str],
+        dimensions: DimensionGroup | Iterable[str],
         storageClass: StorageClass | str,
         parentStorageClass: StorageClass | str | None = None,
         *,
@@ -221,9 +210,7 @@ def __init__(
         self._name = name
         universe = universe or getattr(dimensions, "universe", None)
         if universe is None:
-            raise ValueError(
-                "If dimensions is not a DimensionGroup or DimensionGraph, a universe must be provided."
-            )
+            raise ValueError("If dimensions is not a DimensionGroup, a universe must be provided.")
         self._dimensions = universe.conform(dimensions)
         if name in self._dimensions.universe.governor_dimensions:
             raise ValueError(f"Governor dimension name {name} cannot be used as a dataset type name.")
@@ -373,12 +360,12 @@ def name(self) -> str:
         return self._name
 
     @property
-    def dimensions(self) -> DimensionGraph:
-        """Return the dimensions of this dataset type (`DimensionGraph`).
+    def dimensions(self) -> DimensionGroup:
+        """Return the dimensions of this dataset type (`DimensionGroup`).
 
         The dimensions of a define the keys of its datasets' data IDs..
         """
-        return self._dimensions._as_graph()
+        return self._dimensions
 
     @property
     def storageClass(self) -> StorageClass:
@@ -744,14 +731,9 @@ def from_simple(
         if universe is None:
             # this is for mypy
             raise ValueError("Unable to determine a usable universe")
-
-        match simple.dimensions:
-            case list():
-                dimensions = universe.conform(simple.dimensions)
-            case SerializedDimensionGraph():
-                dimensions = universe.conform(simple.dimensions.names)
-            case None:
-                raise ValueError(f"Dimensions must be specified in {simple}")
+        if simple.dimensions is None:
+            raise ValueError(f"Dimensions must be specified in {simple}")
+        dimensions = universe.conform(simple.dimensions)
 
         newType = cls(
             name=simple.name,

diff --git a/python/lsst/daf/butler/_registry_shim.py b/python/lsst/daf/butler/_registry_shim.py
@@ -36,14 +36,11 @@
 from ._dataset_association import DatasetAssociation
 from ._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef
 from ._dataset_type import DatasetType
-from ._named import NameLookupMapping
 from ._timespan import Timespan
 from .dimensions import (
     DataCoordinate,
     DataId,
-    Dimension,
     DimensionElement,
-    DimensionGraph,
     DimensionGroup,
     DimensionRecord,
     DimensionUniverse,
@@ -248,15 +245,14 @@ def expandDataId(
         self,
         dataId: DataId | None = None,
         *,
-        dimensions: Iterable[str] | DimensionGroup | DimensionGraph | None = None,
-        graph: DimensionGraph | None = None,
-        records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None,
+        dimensions: Iterable[str] | DimensionGroup | None = None,
+        records: Mapping[str, DimensionRecord | None] | None = None,
         withDefaults: bool = True,
         **kwargs: Any,
     ) -> DataCoordinate:
         # Docstring inherited from a base class.
         return self._registry.expandDataId(
-            dataId, dimensions=dimensions, graph=graph, records=records, withDefaults=withDefaults, **kwargs
+            dataId, dimensions=dimensions, records=records, withDefaults=withDefaults, **kwargs
         )
 
     def insertDimensionData(
@@ -310,7 +306,7 @@ def queryDatasets(
         datasetType: Any,
         *,
         collections: CollectionArgType | None = None,
-        dimensions: Iterable[Dimension | str] | None = None,
+        dimensions: Iterable[str] | None = None,
         dataId: DataId | None = None,
         where: str = "",
         findFirst: bool = False,
@@ -335,8 +331,7 @@ def queryDatasets(
 
     def queryDataIds(
         self,
-        # TODO: Drop Dimension support on DM-41326.
-        dimensions: DimensionGroup | Iterable[Dimension | str] | Dimension | str,
+        dimensions: DimensionGroup | Iterable[str] | str,
         *,
         dataId: DataId | None = None,
         datasets: Any = None,

diff --git a/python/lsst/daf/butler/datastore/file_templates.py b/python/lsst/daf/butler/datastore/file_templates.py
@@ -43,7 +43,7 @@
 from .._dataset_ref import DatasetId, DatasetRef
 from .._exceptions import ValidationError
 from .._storage_class import StorageClass
-from ..dimensions import DataCoordinate, DimensionGraph, DimensionGroup
+from ..dimensions import DataCoordinate, DimensionGroup
 
 if TYPE_CHECKING:
     from .._dataset_type import DatasetType
@@ -394,9 +394,7 @@ def __str__(self) -> str:
     def __repr__(self) -> str:
         return f'{self.__class__.__name__}("{self.template}")'
 
-    def grouped_fields(
-        self, dimensions: DimensionGroup | DimensionGraph | None = None
-    ) -> tuple[FieldDict, FieldDict]:
+    def grouped_fields(self, dimensions: DimensionGroup | None = None) -> tuple[FieldDict, FieldDict]:
         """Return all the fields, grouped by their type.
 
         Parameters
@@ -723,7 +721,7 @@ def validateTemplate(self, entity: DatasetRef | DatasetType | StorageClass | Non
         -----
         Validation will always include a check that mandatory fields
         are present and that at least one field refers to a dimension.
-        If the supplied entity includes a `DimensionGraph` then it will be
+        If the supplied entity includes a `DimensionGroup` then it will be
         used to compare the available dimensions with those specified in the
         template.
         """
@@ -821,7 +819,7 @@ def validateTemplate(self, entity: DatasetRef | DatasetType | StorageClass | Non
         # Fall back to dataId keys if we have them but no links.
         # dataId keys must still be present in the template
         try:
-            minimal = set(entity.dimensions.required.names)
+            minimal = set(entity.dimensions.required)
             maximal = set(entity.dimensions.names)
         except AttributeError:
             try:
@@ -890,5 +888,5 @@ def _determine_skypix_alias(self, entity: DatasetRef | DatasetType) -> str | Non
         # update to be more like the real world while still providing our
         # only tests of important behavior.
         if len(entity.dimensions.skypix) == 1:
-            (alias,) = entity.dimensions.skypix.names
+            (alias,) = entity.dimensions.skypix
         return alias
diff --git a/python/lsst/daf/butler/dimensions/__init__.py b/python/lsst/daf/butler/dimensions/__init__.py
@@ -40,7 +40,6 @@
 from ._database import *
 from ._elements import *
 from ._governor import *
-from ._graph import *
 from ._group import *
 from ._packer import *
 from ._record_set import *