Skip to content

Commit

Permalink
Merge pull request #1036 from lsst/tickets/DM-41326
Browse files Browse the repository at this point in the history
DM-41326: remove DimensionGraph and related interfaces
  • Loading branch information
TallJimbo committed Jul 25, 2024
2 parents fd5547a + 73ae713 commit cdbbd14
Show file tree
Hide file tree
Showing 45 changed files with 343 additions and 1,570 deletions.
3 changes: 3 additions & 0 deletions doc/changes/DM-41326.removal.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Remove `DimensionGraph` and the `Mapping` interface to `DataCoordinate`, along with most other public interfaces that utilize `DimensionElement` instances instead of just their string names.

See [RFC-834](https://rubinobs.atlassian.net/browse/RFC-834) for full details and rationale.
4 changes: 2 additions & 2 deletions python/lsst/daf/butler/_dataset_ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
from ._dataset_type import DatasetType, SerializedDatasetType
from ._named import NamedKeyDict
from .datastore.stored_file_info import StoredDatastoreItemInfo
from .dimensions import DataCoordinate, DimensionGraph, DimensionUniverse, SerializedDataCoordinate
from .dimensions import DataCoordinate, DimensionGroup, DimensionUniverse, SerializedDataCoordinate
from .json import from_json_pydantic, to_json_pydantic
from .persistence_context import PersistenceContextVars

Expand Down Expand Up @@ -363,7 +363,7 @@ def __hash__(self) -> int:
return hash((self.datasetType, self.dataId, self.id))

@property
def dimensions(self) -> DimensionGraph:
def dimensions(self) -> DimensionGroup:
"""Dimensions associated with the underlying `DatasetType`."""
return self.datasetType.dimensions

Expand Down
52 changes: 17 additions & 35 deletions python/lsst/daf/butler/_dataset_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@

from ._config_support import LookupKey
from ._storage_class import StorageClass, StorageClassFactory
from .dimensions import DimensionGraph, DimensionGroup, SerializedDimensionGraph
from .dimensions import DimensionGroup
from .json import from_json_pydantic, to_json_pydantic
from .persistence_context import PersistenceContextVars

if TYPE_CHECKING:
from .dimensions import Dimension, DimensionUniverse
from .dimensions import DimensionUniverse
from .registry import Registry


Expand All @@ -59,7 +59,7 @@ class SerializedDatasetType(BaseModel):

name: StrictStr
storageClass: StrictStr | None = None
dimensions: SerializedDimensionGraph | list[StrictStr] | None = None
dimensions: list[StrictStr] | None = None
parentStorageClass: StrictStr | None = None
isCalibration: StrictBool = False

Expand All @@ -69,7 +69,7 @@ def direct(
*,
name: str,
storageClass: str | None = None,
dimensions: list | dict | None = None,
dimensions: list | None = None,
parentStorageClass: str | None = None,
isCalibration: bool = False,
) -> SerializedDatasetType:
Expand All @@ -88,7 +88,7 @@ def direct(
The name of the dataset type.
storageClass : `str` or `None`
The name of the storage class.
dimensions : `list` or `dict` or `None`
dimensions : `list` or `None`
The dimensions associated with this dataset type.
parentStorageClass : `str` or `None`
The parent storage class name if this is a component.
Expand All @@ -104,16 +104,7 @@ def direct(
key = (name, storageClass or "")
if cache is not None and (type_ := cache.get(key, None)) is not None:
return type_

serialized_dimensions: list[str] | None
match dimensions:
case list():
serialized_dimensions = dimensions
case dict():
serialized_dimensions = SerializedDimensionGraph.direct(**dimensions).names
case None:
serialized_dimensions = None

serialized_dimensions = dimensions if dimensions is not None else None
node = cls.model_construct(
name=name,
storageClass=storageClass,
Expand Down Expand Up @@ -148,11 +139,9 @@ class DatasetType:
and underscores. Component dataset types should contain a single
period separating the base dataset type name from the component name
(and may be recursive).
dimensions : `DimensionGroup`, `DimensionGraph`, or \
`~collections.abc.Iterable` [ `Dimension` or `str` ]
dimensions : `DimensionGroup` or `~collections.abc.Iterable` [ `str` ]
Dimensions used to label and relate instances of this `DatasetType`.
If not a `DimensionGraph` or `DimensionGroup`, ``universe`` must be
provided as well.
If not a `DimensionGroup`, ``universe`` must be provided as well.
storageClass : `StorageClass` or `str`
Instance of a `StorageClass` or name of `StorageClass` that defines
how this `DatasetType` is persisted.
Expand All @@ -162,7 +151,7 @@ class DatasetType:
is not a component.
universe : `DimensionUniverse`, optional
Set of all known dimensions, used to normalize ``dimensions`` if it
is not already a `DimensionGraph`.
is not already a `DimensionGroup`.
isCalibration : `bool`, optional
If `True`, this dataset type may be included in
`~CollectionType.CALIBRATION` collections.
Expand Down Expand Up @@ -209,7 +198,7 @@ def nameWithComponent(datasetTypeName: str, componentName: str) -> str:
def __init__(
self,
name: str,
dimensions: DimensionGroup | DimensionGraph | Iterable[Dimension | str],
dimensions: DimensionGroup | Iterable[str],
storageClass: StorageClass | str,
parentStorageClass: StorageClass | str | None = None,
*,
Expand All @@ -221,9 +210,7 @@ def __init__(
self._name = name
universe = universe or getattr(dimensions, "universe", None)
if universe is None:
raise ValueError(
"If dimensions is not a DimensionGroup or DimensionGraph, a universe must be provided."
)
raise ValueError("If dimensions is not a DimensionGroup, a universe must be provided.")
self._dimensions = universe.conform(dimensions)
if name in self._dimensions.universe.governor_dimensions:
raise ValueError(f"Governor dimension name {name} cannot be used as a dataset type name.")
Expand Down Expand Up @@ -373,12 +360,12 @@ def name(self) -> str:
return self._name

@property
def dimensions(self) -> DimensionGraph:
"""Return the dimensions of this dataset type (`DimensionGraph`).
def dimensions(self) -> DimensionGroup:
"""Return the dimensions of this dataset type (`DimensionGroup`).
The dimensions of a define the keys of its datasets' data IDs..
"""
return self._dimensions._as_graph()
return self._dimensions

@property
def storageClass(self) -> StorageClass:
Expand Down Expand Up @@ -744,14 +731,9 @@ def from_simple(
if universe is None:
# this is for mypy
raise ValueError("Unable to determine a usable universe")

match simple.dimensions:
case list():
dimensions = universe.conform(simple.dimensions)
case SerializedDimensionGraph():
dimensions = universe.conform(simple.dimensions.names)
case None:
raise ValueError(f"Dimensions must be specified in {simple}")
if simple.dimensions is None:
raise ValueError(f"Dimensions must be specified in {simple}")
dimensions = universe.conform(simple.dimensions)

newType = cls(
name=simple.name,
Expand Down
15 changes: 5 additions & 10 deletions python/lsst/daf/butler/_registry_shim.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,11 @@
from ._dataset_association import DatasetAssociation
from ._dataset_ref import DatasetId, DatasetIdGenEnum, DatasetRef
from ._dataset_type import DatasetType
from ._named import NameLookupMapping
from ._timespan import Timespan
from .dimensions import (
DataCoordinate,
DataId,
Dimension,
DimensionElement,
DimensionGraph,
DimensionGroup,
DimensionRecord,
DimensionUniverse,
Expand Down Expand Up @@ -248,15 +245,14 @@ def expandDataId(
self,
dataId: DataId | None = None,
*,
dimensions: Iterable[str] | DimensionGroup | DimensionGraph | None = None,
graph: DimensionGraph | None = None,
records: NameLookupMapping[DimensionElement, DimensionRecord | None] | None = None,
dimensions: Iterable[str] | DimensionGroup | None = None,
records: Mapping[str, DimensionRecord | None] | None = None,
withDefaults: bool = True,
**kwargs: Any,
) -> DataCoordinate:
# Docstring inherited from a base class.
return self._registry.expandDataId(
dataId, dimensions=dimensions, graph=graph, records=records, withDefaults=withDefaults, **kwargs
dataId, dimensions=dimensions, records=records, withDefaults=withDefaults, **kwargs
)

def insertDimensionData(
Expand Down Expand Up @@ -310,7 +306,7 @@ def queryDatasets(
datasetType: Any,
*,
collections: CollectionArgType | None = None,
dimensions: Iterable[Dimension | str] | None = None,
dimensions: Iterable[str] | None = None,
dataId: DataId | None = None,
where: str = "",
findFirst: bool = False,
Expand All @@ -335,8 +331,7 @@ def queryDatasets(

def queryDataIds(
self,
# TODO: Drop Dimension support on DM-41326.
dimensions: DimensionGroup | Iterable[Dimension | str] | Dimension | str,
dimensions: DimensionGroup | Iterable[str] | str,
*,
dataId: DataId | None = None,
datasets: Any = None,
Expand Down
12 changes: 5 additions & 7 deletions python/lsst/daf/butler/datastore/file_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
from .._dataset_ref import DatasetId, DatasetRef
from .._exceptions import ValidationError
from .._storage_class import StorageClass
from ..dimensions import DataCoordinate, DimensionGraph, DimensionGroup
from ..dimensions import DataCoordinate, DimensionGroup

if TYPE_CHECKING:
from .._dataset_type import DatasetType
Expand Down Expand Up @@ -394,9 +394,7 @@ def __str__(self) -> str:
def __repr__(self) -> str:
return f'{self.__class__.__name__}("{self.template}")'

def grouped_fields(
self, dimensions: DimensionGroup | DimensionGraph | None = None
) -> tuple[FieldDict, FieldDict]:
def grouped_fields(self, dimensions: DimensionGroup | None = None) -> tuple[FieldDict, FieldDict]:
"""Return all the fields, grouped by their type.
Parameters
Expand Down Expand Up @@ -723,7 +721,7 @@ def validateTemplate(self, entity: DatasetRef | DatasetType | StorageClass | Non
-----
Validation will always include a check that mandatory fields
are present and that at least one field refers to a dimension.
If the supplied entity includes a `DimensionGraph` then it will be
If the supplied entity includes a `DimensionGroup` then it will be
used to compare the available dimensions with those specified in the
template.
"""
Expand Down Expand Up @@ -821,7 +819,7 @@ def validateTemplate(self, entity: DatasetRef | DatasetType | StorageClass | Non
# Fall back to dataId keys if we have them but no links.
# dataId keys must still be present in the template
try:
minimal = set(entity.dimensions.required.names)
minimal = set(entity.dimensions.required)
maximal = set(entity.dimensions.names)
except AttributeError:
try:
Expand Down Expand Up @@ -890,5 +888,5 @@ def _determine_skypix_alias(self, entity: DatasetRef | DatasetType) -> str | Non
# update to be more like the real world while still providing our
# only tests of important behavior.
if len(entity.dimensions.skypix) == 1:
(alias,) = entity.dimensions.skypix.names
(alias,) = entity.dimensions.skypix
return alias
1 change: 0 additions & 1 deletion python/lsst/daf/butler/dimensions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
from ._database import *
from ._elements import *
from ._governor import *
from ._graph import *
from ._group import *
from ._packer import *
from ._record_set import *
Expand Down
Loading

0 comments on commit cdbbd14

Please sign in to comment.