Skip to content

Commit

Permalink
Remove datastore records from serialized DatasetRef (DM-41226)
Browse files Browse the repository at this point in the history
This removes the piece of code for (de-)serializing datastore records in
DatasetRef that was added on DM-40053. We worry that serialized QuantumGraph
could grow in size, though presently we do not fill datastore records when
we make refs for quantum graph. In the future the whole thing will be
replaced by a more efficient data structures, so this serialization will
likely disappear entirely.
  • Loading branch information
andy-slac committed Oct 18, 2023
1 parent 9d5d565 commit 674a7e3
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 28 deletions.
25 changes: 0 additions & 25 deletions python/lsst/daf/butler/_dataset_ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,10 +182,6 @@ def makeDatasetId(
# This is constant, so don't recreate a set for each instance
_serializedDatasetRefFieldsSet = {"id", "datasetType", "dataId", "run", "component"}

# Serialized representation of StoredDatastoreItemInfo collection, first item
# is the record class name.
_DatastoreRecords: TypeAlias = tuple[str, list[Mapping[str, Any]]]


class SerializedDatasetRef(_BaseModelCompat):
"""Simplified model of a `DatasetRef` suitable for serialization."""
Expand All @@ -195,8 +191,6 @@ class SerializedDatasetRef(_BaseModelCompat):
dataId: SerializedDataCoordinate | None = None
run: StrictStr | None = None
component: StrictStr | None = None
datastore_records: Mapping[str, _DatastoreRecords] | None = None
"""Maps opaque table name to datastore records."""

if PYDANTIC_V2:
# Can not use "after" validator since in some cases the validator
Expand Down Expand Up @@ -237,7 +231,6 @@ def direct(
datasetType: dict[str, Any] | None = None,
dataId: dict[str, Any] | None = None,
component: str | None = None,
datastore_records: Mapping[str, _DatastoreRecords] | None = None,
) -> SerializedDatasetRef:
"""Construct a `SerializedDatasetRef` directly without validators.
Expand All @@ -264,7 +257,6 @@ def direct(
dataId=serialized_dataId,
run=sys.intern(run),
component=component,
datastore_records=datastore_records,
)

return node
Expand Down Expand Up @@ -430,19 +422,11 @@ def to_simple(self, minimal: bool = False) -> SerializedDatasetRef:
simple["component"] = self.datasetType.component()
return SerializedDatasetRef(**simple)

datastore_records: Mapping[str, _DatastoreRecords] | None = None
if self._datastore_records is not None:
datastore_records = {}
for opaque_name, records in self._datastore_records.items():
class_name, record_dicts = StoredDatastoreItemInfo.to_records(records)
datastore_records[opaque_name] = class_name, list(record_dicts)

return SerializedDatasetRef(
datasetType=self.datasetType.to_simple(minimal=minimal),
dataId=self.dataId.to_simple(),
run=self.run,
id=self.id,
datastore_records=datastore_records,
)

@classmethod
Expand Down Expand Up @@ -537,20 +521,11 @@ def from_simple(
f"Encountered with {simple!r}{dstr}."
)

# rebuild datastore records
datastore_records: DatasetDatastoreRecords | None = None
if simple.datastore_records is not None:
datastore_records = {}
for opaque_name, (class_name, records) in simple.datastore_records.items():
infos = StoredDatastoreItemInfo.from_records(class_name, records)
datastore_records[opaque_name] = infos

newRef = cls(
datasetType,
dataId,
id=simple.id,
run=simple.run,
datastore_records=datastore_records,
)
if cache is not None:
cache[key] = newRef
Expand Down
6 changes: 3 additions & 3 deletions tests/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -709,13 +709,13 @@ def testJson(self) -> None:
s = ref.to_json()
self.assertEqual(DatasetRef.from_json(s, universe=self.universe), ref)

# Also test ref with datastore records
# Also test ref with datastore records, serialization does not
# preserve those.
ref = self._make_datastore_records(ref, "/path1", "/path2")
s = ref.to_json()
ref2 = DatasetRef.from_json(s, universe=self.universe)
self.assertEqual(ref2, ref)
self.assertIsNotNone(ref2._datastore_records)
self.assertEqual(ref2._datastore_records, ref._datastore_records)
self.assertIsNone(ref2._datastore_records)

def testFileDataset(self) -> None:
ref = DatasetRef(self.datasetType, self.dataId, run="somerun")
Expand Down

0 comments on commit 674a7e3

Please sign in to comment.