Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-41724: make dimension record construction more strict #907

Merged
merged 8 commits into from
Nov 17, 2023
5 changes: 5 additions & 0 deletions doc/changes/DM-41724.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Stop accepting and ignoring unrecognized keyword arguments in `DimensionRecord` constructors.

Passing an invalid field to a `DimensionRecord` now raises `TypeError`.

This also prevents `DimensionRecord` construction from reinterpreting `timespan=None` as `timespan=Timespan(None, None)`.
15 changes: 9 additions & 6 deletions python/lsst/daf/butler/_timespan.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ def isEmpty(self) -> bool:
def __str__(self) -> str:
if self.isEmpty():
return "(empty)"
fmt = "%Y-%m-%dT%H:%M:%S"
# Trap dubious year warnings in case we have timespans from
# simulated data in the future
with warnings.catch_warnings():
Expand All @@ -289,12 +290,12 @@ def __str__(self) -> str:
head = "(-∞, "
else:
assert isinstance(self.begin, astropy.time.Time), "guaranteed by earlier checks and ctor"
head = f"[{self.begin.tai.isot}, "
head = f"[{self.begin.tai.strftime(fmt)}, "
if self.end is None:
tail = "∞)"
else:
assert isinstance(self.end, astropy.time.Time), "guaranteed by earlier checks and ctor"
tail = f"{self.end.tai.isot})"
tail = f"{self.end.tai.strftime(fmt)})"
return head + tail

def __repr__(self) -> str:
Expand Down Expand Up @@ -507,17 +508,17 @@ def to_simple(self, minimal: bool = False) -> list[int]:
@classmethod
def from_simple(
cls,
simple: list[int],
simple: list[int] | None,
universe: DimensionUniverse | None = None,
registry: Registry | None = None,
) -> Timespan:
) -> Timespan | None:
"""Construct a new object from simplified form.

Designed to use the data returned from the `to_simple` method.

Parameters
----------
simple : `list` of `int`
simple : `list` of `int`, or `None`
The values returned by `to_simple()`.
universe : `DimensionUniverse`, optional
Unused.
Expand All @@ -526,9 +527,11 @@ def from_simple(

Returns
-------
result : `Timespan`
result : `Timespan` or `None`
Newly-constructed object.
"""
if simple is None:
return None
nsec1, nsec2 = simple # for mypy
return cls(begin=None, end=None, _nsec=(nsec1, nsec2))

Expand Down
35 changes: 23 additions & 12 deletions python/lsst/daf/butler/dimensions/_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@
field_type = Optional[field_type] # type: ignore
members[field.name] = (field_type, ...)
if definition.temporal:
members["timespan"] = (Tuple[int, int], ...) # type: ignore
members["timespan"] = (Optional[Tuple[int, int]], ...) # type: ignore
if definition.spatial:
members["region"] = (str, ...)

Expand Down Expand Up @@ -277,17 +277,6 @@
"Multiple inconsistent values for "
f"{self.definition.name}.{self.definition.primaryKey.name}: {v!r} != {v2!r}."
)
for name in self.__slots__:
object.__setattr__(self, name, kwargs.get(name))
if self.definition.temporal is not None and self.timespan is None:
object.__setattr__(
self,
"timespan",
Timespan(
kwargs.get("datetime_begin"),
kwargs.get("datetime_end"),
),
)

from ._coordinate import DataCoordinate

Expand All @@ -299,6 +288,28 @@
tuple(kwargs[dimension] for dimension in self.definition.required.names),
),
)
# Don't need the primary key value aliased to the dimension name
# anymore.
kwargs.pop(self.definition.name, None)

for name in self.__slots__:
# Note that we remove from kwargs as we go, to make sure there's
# nothing left at the end.
object.__setattr__(self, name, kwargs.pop(name, None))
# Support 'datetime_begin' and 'datetime_end' instead of 'timespan' for
# backwards compatibility, but if one is provided both must be.
if self.definition.temporal is not None and self.timespan is None and "datetime_begin" in kwargs:
object.__setattr__(
self,
"timespan",
Timespan(
kwargs.pop("datetime_begin"),
kwargs.pop("datetime_end"),
),
)
Comment on lines +301 to +309
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a small change in behavior here, not sure if it is intended to enforce that any Timespan here will always be bounded or not.

Previously if only datetime_begin was passed without also including datetime_end, it would create a Timespan without an upper bound. Now it throws KeyError in this case instead.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was intentional, but the reader shouldn't have to wonder about that; I'll add a code comment saying that if you pass one you're required to pass both.


if kwargs:
raise TypeError(f"Invalid fields for {self.definition} dimension record: {set(kwargs.keys())}.")

Check warning on line 312 in python/lsst/daf/butler/dimensions/_records.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/dimensions/_records.py#L312

Added line #L312 was not covered by tests

def __eq__(self, other: Any) -> bool:
if type(other) != type(self):
Expand Down
26 changes: 11 additions & 15 deletions python/lsst/daf/butler/registry/tests/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,11 +375,11 @@ def testDataIdRelationships(self):
)
registry.insertDimensionData(
"visit",
{"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G", "visit_system": 0},
{"instrument": "Cam1", "id": 1, "name": "one", "physical_filter": "Cam1-G"},
)
registry.insertDimensionData(
"visit_definition",
{"instrument": "Cam1", "visit": 1, "exposure": 1, "visit_system": 0},
{"instrument": "Cam1", "visit": 1, "exposure": 1},
)
with self.assertRaises(InconsistentDataIdError):
registry.expandDataId(
Expand Down Expand Up @@ -981,15 +981,11 @@ def testInstrumentDimensions(self):
registry.insertDimensionData(
"detector", *[dict(instrument="DummyCam", id=i, full_name=str(i)) for i in range(1, 6)]
)
registry.insertDimensionData(
"visit_system",
dict(instrument="DummyCam", id=1, name="default"),
)
registry.insertDimensionData(
"visit",
dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i", visit_system=1),
dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r", visit_system=1),
dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r", visit_system=1),
dict(instrument="DummyCam", id=10, name="ten", physical_filter="dummy_i"),
dict(instrument="DummyCam", id=11, name="eleven", physical_filter="dummy_r"),
dict(instrument="DummyCam", id=20, name="twelve", physical_filter="dummy_r"),
)
for i in range(1, 6):
registry.insertDimensionData(
Expand All @@ -1009,12 +1005,12 @@ def testInstrumentDimensions(self):
)
registry.insertDimensionData(
"visit_definition",
dict(instrument="DummyCam", exposure=100, visit_system=1, visit=10),
dict(instrument="DummyCam", exposure=101, visit_system=1, visit=10),
dict(instrument="DummyCam", exposure=110, visit_system=1, visit=11),
dict(instrument="DummyCam", exposure=111, visit_system=1, visit=11),
dict(instrument="DummyCam", exposure=200, visit_system=1, visit=20),
dict(instrument="DummyCam", exposure=201, visit_system=1, visit=20),
dict(instrument="DummyCam", exposure=100, visit=10),
dict(instrument="DummyCam", exposure=101, visit=10),
dict(instrument="DummyCam", exposure=110, visit=11),
dict(instrument="DummyCam", exposure=111, visit=11),
dict(instrument="DummyCam", exposure=200, visit=20),
dict(instrument="DummyCam", exposure=201, visit=20),
)
# dataset types
run1 = "test1_r"
Expand Down
19 changes: 13 additions & 6 deletions python/lsst/daf/butler/script/queryDimensionRecords.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from typing import Any

from astropy.table import Table
from lsst.sphgeom import Region

from .._butler import Butler
from .._timespan import Timespan
Expand Down Expand Up @@ -78,13 +79,19 @@
# use the dataId to sort the rows if not ordered already
records.sort(key=lambda r: r.dataId)

keys = records[0].fields.names # order the columns the same as the record's `field.names`
# order the columns the same as the record's `field.names`, and add units
# to timespans
keys = records[0].fields.names
headers = ["timespan (TAI)" if name == "timespan" else name for name in records[0].fields.names]

def conform(v: Any) -> Any:
if isinstance(v, Timespan):
v = (v.begin, v.end)
elif isinstance(v, bytes):
v = "0x" + v.hex()
match v:
case Timespan():
v = str(v)
case bytes():
v = "0x" + v.hex()
case Region():
v = "(elided)"

Check warning on line 94 in python/lsst/daf/butler/script/queryDimensionRecords.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/script/queryDimensionRecords.py#L94

Added line #L94 was not covered by tests
return v

return Table([[conform(getattr(record, key, None)) for record in records] for key in keys], names=keys)
return Table([[conform(getattr(record, key, None)) for record in records] for key in keys], names=headers)
5 changes: 2 additions & 3 deletions python/lsst/daf/butler/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
import astropy
from astropy.table import Table as AstropyTable

from .. import Butler, Config, StorageClassFactory
from .. import Butler, Config, StorageClassFactory, Timespan
from ..registry import CollectionType
from ..tests import MetricsExample, addDatasetType

Expand Down Expand Up @@ -268,8 +268,7 @@ def __init__(self, root: str, configFile: str) -> None:
id=423,
name="fourtwentythree",
physical_filter="d-r",
datetimeBegin=visitStart,
datetimeEnd=visitEnd,
timespan=Timespan(visitStart, visitEnd),
),
)
self.butler.registry.insertDimensionData(
Expand Down
42 changes: 32 additions & 10 deletions python/lsst/daf/butler/transfers/_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,18 @@ def __init__(self, stream: IO, registry: SqlRegistry):
):
migrate_visit_system = True

# Drop "seeing" from visits in files older than version 1.
migrate_visit_seeing = False
if (
universe_version < 1
and universe_namespace == "daf_butler"
and "visit" in self.registry.dimensions
and "seeing" not in self.registry.dimensions["visit"].metadata
):
migrate_visit_seeing = True

datasetData = []
RecordClass: type[DimensionRecord]
for data in wrapper["data"]:
if data["type"] == "dimension":
# convert all datetime values to astropy
Expand All @@ -296,19 +307,30 @@ def __init__(self, stream: IO, registry: SqlRegistry):
# class with special YAML tag.
if isinstance(record[key], datetime):
record[key] = astropy.time.Time(record[key], scale="utc")

if data["element"] == "visit":
if migrate_visit_system:
# Must create the visit_system_membership records.
# But first create empty list for visits since other
# logic in this file depends on self.dimensions being
# populated in an order consisteny with primary keys.
self.dimensions[self.registry.dimensions["visit"]] = []
element = self.registry.dimensions["visit_system_membership"]
RecordClass = element.RecordClass
self.dimensions[element].extend(
RecordClass(
instrument=r["instrument"], visit_system=r.pop("visit_system"), visit=r["id"]
)
for r in data["records"]
)
if migrate_visit_seeing:
for record in data["records"]:
record.pop("seeing", None)

element = self.registry.dimensions[data["element"]]
RecordClass: type[DimensionRecord] = element.RecordClass
RecordClass = element.RecordClass
self.dimensions[element].extend(RecordClass(**r) for r in data["records"])

if data["element"] == "visit" and migrate_visit_system:
# Must create the visit_system_membership records.
element = self.registry.dimensions["visit_system_membership"]
RecordClass = element.RecordClass
self.dimensions[element].extend(
RecordClass(instrument=r["instrument"], visit_system=r["visit_system"], visit=r["id"])
for r in data["records"]
)

elif data["type"] == "collection":
collectionType = CollectionType.from_name(data["collection_type"])
if collectionType is CollectionType.RUN:
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
pyyaml >= 5.1
astropy >= 4.0
astropy >= 5.2
click >7.0
sqlalchemy >= 1.4
pydantic <3.0
Expand Down
2 changes: 0 additions & 2 deletions tests/test_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,6 @@ def create_butler(
"id": 423,
"name": "fourtwentythree",
"physical_filter": "d-r",
"visit_system": 1,
"datetime_begin": visit_start,
"datetime_end": visit_end,
},
Expand All @@ -255,7 +254,6 @@ def create_butler(
"id": visit_id,
"name": f"fourtwentyfour_{visit_id}",
"physical_filter": "d-r",
"visit_system": 1,
},
)
return butler, datasetType
Expand Down
23 changes: 9 additions & 14 deletions tests/test_cliCmdQueryDimensionRecords.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,7 @@
import os
import unittest

import astropy
from astropy.table import Table as AstropyTable
from astropy.utils.introspection import minversion
from lsst.daf.butler import Butler, StorageClassFactory
from lsst.daf.butler.cli.butler import cli as butlerCli
from lsst.daf.butler.cli.utils import LogCliRunner, clickResultMsg
Expand All @@ -48,10 +46,6 @@

TESTDIR = os.path.abspath(os.path.dirname(__file__))

# Astropy changed the handling of numpy columns in v5.1 so anything
# greater than 5.0 (two digit version) does not need the annotated column.
timespan_columns = "" if minversion(astropy, "5.1") else " [2]"


class QueryDimensionRecordsTest(unittest.TestCase, ButlerTestHelper):
"""Test the query-dimension-records command-line."""
Expand All @@ -75,7 +69,7 @@ class QueryDimensionRecordsTest(unittest.TestCase, ButlerTestHelper):
"azimuth",
"zenith_angle",
"region",
f"timespan{timespan_columns}",
"timespan (TAI)",
)

def setUp(self):
Expand Down Expand Up @@ -107,7 +101,7 @@ def testBasic(self):
"None",
"None",
"None",
"None .. None",
"[2020-01-01T08:00:00, 2020-01-01T08:00:36)",
),
(
"DummyCamComp",
Expand All @@ -123,12 +117,13 @@ def testBasic(self):
"None",
"None",
"None",
"None .. None",
"None",
),
)
)
expected = AstropyTable(rows, names=self.expectedColumnNames)
self.assertAstropyTablesEqual(readTable(result.output), expected)
got = readTable(result.output)
self.assertAstropyTablesEqual(got, expected)

def testWhere(self):
result = self.runner.invoke(
Expand Down Expand Up @@ -158,7 +153,7 @@ def testWhere(self):
"None",
"None",
"None",
"None .. None",
"[2020-01-01T08:00:00, 2020-01-01T08:00:36)",
),
)
)
Expand Down Expand Up @@ -212,7 +207,7 @@ def testCollection(self):
"None",
"None",
"None",
"None .. None",
"[2020-01-01T08:00:00, 2020-01-01T08:00:36)",
),
(
"DummyCamComp",
Expand All @@ -228,7 +223,7 @@ def testCollection(self):
"None",
"None",
"None",
"None .. None",
"None",
),
)
)
Expand Down Expand Up @@ -265,7 +260,7 @@ def testCollection(self):
"None",
"None",
"None",
"None .. None",
"None",
),
)
)
Expand Down
Loading
Loading