Skip to content

Commit

Permalink
Merge pull request #907 from lsst/tickets/DM-41724
Browse files Browse the repository at this point in the history
DM-41724: make dimension record construction more strict
  • Loading branch information
TallJimbo authored Nov 17, 2023
2 parents 8e2519f + 32800dc commit f22234c
Show file tree
Hide file tree
Showing 12 changed files with 111 additions and 72 deletions.
5 changes: 5 additions & 0 deletions doc/changes/DM-41724.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Stop accepting and ignoring unrecognized keyword arguments in `DimensionRecord` constructors.

Passing an invalid field to a `DimensionRecord` now raises `TypeError`.

This also prevents `DimensionRecord` construction from reinterpreting `timespan=None` as `timespan=Timespan(None, None)`.
15 changes: 9 additions & 6 deletions python/lsst/daf/butler/_timespan.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ def isEmpty(self) -> bool:
def __str__(self) -> str:
if self.isEmpty():
return "(empty)"
fmt = "%Y-%m-%dT%H:%M:%S"
# Trap dubious year warnings in case we have timespans from
# simulated data in the future
with warnings.catch_warnings():
Expand All @@ -289,12 +290,12 @@ def __str__(self) -> str:
head = "(-∞, "
else:
assert isinstance(self.begin, astropy.time.Time), "guaranteed by earlier checks and ctor"
head = f"[{self.begin.tai.isot}, "
head = f"[{self.begin.tai.strftime(fmt)}, "
if self.end is None:
tail = "∞)"
else:
assert isinstance(self.end, astropy.time.Time), "guaranteed by earlier checks and ctor"
tail = f"{self.end.tai.isot})"
tail = f"{self.end.tai.strftime(fmt)})"
return head + tail

def __repr__(self) -> str:
Expand Down Expand Up @@ -507,17 +508,17 @@ def to_simple(self, minimal: bool = False) -> list[int]:
@classmethod
def from_simple(
cls,
simple: list[int],
simple: list[int] | None,
universe: DimensionUniverse | None = None,
registry: Registry | None = None,
) -> Timespan:
) -> Timespan | None:
"""Construct a new object from simplified form.
Designed to use the data returned from the `to_simple` method.
Parameters
----------
simple : `list` of `int`
simple : `list` of `int`, or `None`
The values returned by `to_simple()`.
universe : `DimensionUniverse`, optional
Unused.
Expand All @@ -526,9 +527,11 @@ def from_simple(
Returns
-------
result : `Timespan`
result : `Timespan` or `None`
Newly-constructed object.
"""
if simple is None:
return None
nsec1, nsec2 = simple # for mypy
return cls(begin=None, end=None, _nsec=(nsec1, nsec2))

Expand Down
35 changes: 23 additions & 12 deletions python/lsst/daf/butler/dimensions/_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def _createSimpleRecordSubclass(definition: DimensionElement) -> type[SpecificSe
field_type = Optional[field_type] # type: ignore
members[field.name] = (field_type, ...)
if definition.temporal:
members["timespan"] = (Tuple[int, int], ...) # type: ignore
members["timespan"] = (Optional[Tuple[int, int]], ...) # type: ignore
if definition.spatial:
members["region"] = (str, ...)

Expand Down Expand Up @@ -277,17 +277,6 @@ def __init__(self, **kwargs: Any):
"Multiple inconsistent values for "
f"{self.definition.name}.{self.definition.primaryKey.name}: {v!r} != {v2!r}."
)
for name in self.__slots__:
object.__setattr__(self, name, kwargs.get(name))
if self.definition.temporal is not None and self.timespan is None:
object.__setattr__(
self,
"timespan",
Timespan(
kwargs.get("datetime_begin"),
kwargs.get("datetime_end"),
),
)

from ._coordinate import DataCoordinate

Expand All @@ -299,6 +288,28 @@ def __init__(self, **kwargs: Any):
tuple(kwargs[dimension] for dimension in self.definition.required.names),
),
)
# Don't need the primary key value aliased to the dimension name
# anymore.
kwargs.pop(self.definition.name, None)

for name in self.__slots__:
# Note that we remove from kwargs as we go, to make sure there's
# nothing left at the end.
object.__setattr__(self, name, kwargs.pop(name, None))
# Support 'datetime_begin' and 'datetime_end' instead of 'timespan' for
# backwards compatibility, but if one is provided both must be.
if self.definition.temporal is not None and self.timespan is None and "datetime_begin" in kwargs:
object.__setattr__(
self,
"timespan",
Timespan(
kwargs.pop("datetime_begin"),
kwargs.pop("datetime_end"),
),
)

if kwargs:
raise TypeError(f"Invalid fields for {self.definition} dimension record: {set(kwargs.keys())}.")

def __eq__(self, other: Any) -> bool:
if type(other) != type(self):
Expand Down
26 changes: 11 additions & 15 deletions python/lsst/daf/butler/registry/tests/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,11 +375,11 @@ def testDataIdRelationships(self):
)
registry.insertDimensionData(
"visit",
{"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
{"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G"},
)
registry.insertDimensionData(
"visit_definition",
{"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
{"instrument": "Cam1", "visit": 1, "exposure": 1},
)
with self.assertRaises(InconsistentDataIdError):
registry.expandDataId(
Expand Down Expand Up @@ -981,15 +981,11 @@ def testInstrumentDimensions(self):
registry.insertDimensionData(
"detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
)
registry.insertDimensionData(
"visit_system",
dict(instrument="DummyCam", id=1, name="default"),
)
registry.insertDimensionData(
"visit",
dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i"),
dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r"),
dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r"),
)
for i in range(1, 6):
registry.insertDimensionData(
Expand All @@ -1009,12 +1005,12 @@ def testInstrumentDimensions(self):
)
registry.insertDimensionData(
"visit_definition",
dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
dict(instrument="DummyCam", exposure=100, visit=10),
dict(instrument="DummyCam", exposure=101, visit=10),
dict(instrument="DummyCam", exposure=110, visit=11),
dict(instrument="DummyCam", exposure=111, visit=11),
dict(instrument="DummyCam", exposure=200, visit=20),
dict(instrument="DummyCam", exposure=201, visit=20),
)
# dataset types
run1 = "test1_r"
Expand Down
19 changes: 13 additions & 6 deletions python/lsst/daf/butler/script/queryDimensionRecords.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from typing import Any

from astropy.table import Table
from lsst.sphgeom import Region

from .._butler import Butler
from .._timespan import Timespan
Expand Down Expand Up @@ -78,13 +79,19 @@ def queryDimensionRecords(
# use the dataId to sort the rows if not ordered already
records.sort(key=lambda r: r.dataId)

keys = records[0].fields.names # order the columns the same as the record's `field.names`
# order the columns the same as the record's `field.names`, and add units
# to timespans
keys = records[0].fields.names
headers = ["timespan (TAI)" if name == "timespan" else name for name in records[0].fields.names]

def conform(v: Any) -> Any:
if isinstance(v, Timespan):
v = (v.begin, v.end)
elif isinstance(v, bytes):
v = "0x" + v.hex()
match v:
case Timespan():
v = str(v)
case bytes():
v = "0x" + v.hex()
case Region():
v = "(elided)"
return v

return Table([[conform(getattr(record, key, None)) for record in records] for key in keys], names=keys)
return Table([[conform(getattr(record, key, None)) for record in records] for key in keys], names=headers)
5 changes: 2 additions & 3 deletions python/lsst/daf/butler/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
import astropy
from astropy.table import Table as AstropyTable

from .. import Butler, Config, StorageClassFactory
from .. import Butler, Config, StorageClassFactory, Timespan
from ..registry import CollectionType
from ..tests import MetricsExample, addDatasetType

Expand Down Expand Up @@ -268,8 +268,7 @@ def __init__(self, root: str, configFile: str) -> None:
id=423,
name="fourtwentythree",
physical_filter="d-r",
datetimeBegin=visitStart,
datetimeEnd=visitEnd,
timespan=Timespan(visitStart, visitEnd),
),
)
self.butler.registry.insertDimensionData(
Expand Down
42 changes: 32 additions & 10 deletions python/lsst/daf/butler/transfers/_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,18 @@ def __init__(self, stream: IO, registry: SqlRegistry):
):
migrate_visit_system = True

# Drop "seeing" from visits in files older than version 1.
migrate_visit_seeing = False
if (
universe_version < 1
and universe_namespace == "daf_butler"
and "visit" in self.registry.dimensions
and "seeing" not in self.registry.dimensions["visit"].metadata
):
migrate_visit_seeing = True

datasetData = []
RecordClass: type[DimensionRecord]
for data in wrapper["data"]:
if data["type"] == "dimension":
# convert all datetime values to astropy
Expand All @@ -296,19 +307,30 @@ def __init__(self, stream: IO, registry: SqlRegistry):
# class with special YAML tag.
if isinstance(record[key], datetime):
record[key] = astropy.time.Time(record[key], scale="utc")

if data["element"] == "visit":
if migrate_visit_system:
# Must create the visit_system_membership records.
# But first create empty list for visits since other
# logic in this file depends on self.dimensions being
# populated in an order consisteny with primary keys.
self.dimensions[self.registry.dimensions["visit"]] = []
element = self.registry.dimensions["visit_system_membership"]
RecordClass = element.RecordClass
self.dimensions[element].extend(
RecordClass(
instrument=r["instrument"], visit_system=r.pop("visit_system"), visit=r["id"]
)
for r in data["records"]
)
if migrate_visit_seeing:
for record in data["records"]:
record.pop("seeing", None)

element = self.registry.dimensions[data["element"]]
RecordClass: type[DimensionRecord] = element.RecordClass
RecordClass = element.RecordClass
self.dimensions[element].extend(RecordClass(**r) for r in data["records"])

if data["element"] == "visit" and migrate_visit_system:
# Must create the visit_system_membership records.
element = self.registry.dimensions["visit_system_membership"]
RecordClass = element.RecordClass
self.dimensions[element].extend(
RecordClass(instrument=r["instrument"], visit_system=r["visit_system"], visit=r["id"])
for r in data["records"]
)

elif data["type"] == "collection":
collectionType = CollectionType.from_name(data["collection_type"])
if collectionType is CollectionType.RUN:
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
pyyaml >= 5.1
astropy >= 4.0
astropy >= 5.2
click >7.0
sqlalchemy >= 1.4
pydantic <3.0
Expand Down
2 changes: 0 additions & 2 deletions tests/test_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,6 @@ def create_butler(
"id": 423,
"name": "fourtwentythree",
"physical_filter": "d-r",
"visit_system": 1,
"datetime_begin": visit_start,
"datetime_end": visit_end,
},
Expand All @@ -255,7 +254,6 @@ def create_butler(
"id": visit_id,
"name": f"fourtwentyfour_{visit_id}",
"physical_filter": "d-r",
"visit_system": 1,
},
)
return butler, datasetType
Expand Down
23 changes: 9 additions & 14 deletions tests/test_cliCmdQueryDimensionRecords.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,7 @@
import os
import unittest

import astropy
from astropy.table import Table as AstropyTable
from astropy.utils.introspection import minversion
from lsst.daf.butler import Butler, StorageClassFactory
from lsst.daf.butler.cli.butler import cli as butlerCli
from lsst.daf.butler.cli.utils import LogCliRunner, clickResultMsg
Expand All @@ -48,10 +46,6 @@

TESTDIR = os.path.abspath(os.path.dirname(__file__))

# Astropy changed the handling of numpy columns in v5.1 so anything
# greater than 5.0 (two digit version) does not need the annotated column.
timespan_columns = "" if minversion(astropy, "5.1") else " [2]"


class QueryDimensionRecordsTest(unittest.TestCase, ButlerTestHelper):
"""Test the query-dimension-records command-line."""
Expand All @@ -75,7 +69,7 @@ class QueryDimensionRecordsTest(unittest.TestCase, ButlerTestHelper):
"azimuth",
"zenith_angle",
"region",
f"timespan{timespan_columns}",
"timespan (TAI)",
)

def setUp(self):
Expand Down Expand Up @@ -107,7 +101,7 @@ def testBasic(self):
"None",
"None",
"None",
"None .. None",
"[2020-01-01T08:00:00, 2020-01-01T08:00:36)",
),
(
"DummyCamComp",
Expand All @@ -123,12 +117,13 @@ def testBasic(self):
"None",
"None",
"None",
"None .. None",
"None",
),
)
)
expected = AstropyTable(rows, names=self.expectedColumnNames)
self.assertAstropyTablesEqual(readTable(result.output), expected)
got = readTable(result.output)
self.assertAstropyTablesEqual(got, expected)

def testWhere(self):
result = self.runner.invoke(
Expand Down Expand Up @@ -158,7 +153,7 @@ def testWhere(self):
"None",
"None",
"None",
"None .. None",
"[2020-01-01T08:00:00, 2020-01-01T08:00:36)",
),
)
)
Expand Down Expand Up @@ -212,7 +207,7 @@ def testCollection(self):
"None",
"None",
"None",
"None .. None",
"[2020-01-01T08:00:00, 2020-01-01T08:00:36)",
),
(
"DummyCamComp",
Expand All @@ -228,7 +223,7 @@ def testCollection(self):
"None",
"None",
"None",
"None .. None",
"None",
),
)
)
Expand Down Expand Up @@ -265,7 +260,7 @@ def testCollection(self):
"None",
"None",
"None",
"None .. None",
"None",
),
)
)
Expand Down
Loading

0 comments on commit f22234c

Please sign in to comment.