Skip to content

Commit

Permalink
Merge pull request #865 from lsst/tickets/DM-38084
Browse files Browse the repository at this point in the history
DM-38084: update docs and exception messages to improve timespan-querying usability
  • Loading branch information
TallJimbo authored Jul 14, 2023
2 parents f670e8b + dcf44d8 commit f8470df
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 17 deletions.
1 change: 1 addition & 0 deletions doc/changes/DM-38084.doc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix docs on referring to timespans in queries, and make related error messages more helpful.
6 changes: 3 additions & 3 deletions doc/lsst.daf.butler/queries.rst
Original file line number Diff line number Diff line change
Expand Up @@ -425,11 +425,11 @@ Few examples of valid expressions using some of the constructs:
(visit = 100 OR visit = 101) AND exposure % 2 = 1
visit.datetime_begin > T'2020-03-30 12:20:33'
visit.timespan.begin > T'2020-03-30 12:20:33'
exposure.datetime_begin > T'58938.515'
exposure.timespan.begin > T'58938.515'
visit.datetime_end < T'mjd/58938.515/tai'
visit.timespan.end < T'mjd/58938.515/tai'
ingest_date < T'2020-11-06 21:10:00'
Expand Down
8 changes: 5 additions & 3 deletions python/lsst/daf/butler/cli/opt/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,9 +277,11 @@ def _config_split(*args: Any) -> dict[str, str]:
"--order-by",
help=unwrap(
"""One or more comma-separated names used to order records. Names can be dimension names,
metadata names optionally prefixed by a dimension name and dot, or
timestamp_begin/timestamp_end (with optional dimension name). To reverse ordering for a name
prefix it with a minus sign."""
metadata field names, or "timespan.begin" / "timespan.end" for temporal dimensions.
In some cases the dimension for a metadata field or timespan bound can be inferred, but usually
qualifying these with "<dimension>.<field>" is necessary.
To reverse ordering for a name, prefix it with a minus sign.
"""
),
multiple=True,
callback=split_commas,
Expand Down
33 changes: 24 additions & 9 deletions python/lsst/daf/butler/registry/queries/expressions/categorize.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,17 @@ def categorizeElementId(universe: DimensionUniverse, name: str) -> tuple[Dimensi
so at least its message should generally be propagated up to a context
where the error can be interpreted by a human.
"""
table, sep, column = name.partition(".")
table, _, column = name.partition(".")
if column:
try:
element = universe[table]
except KeyError as err:
raise LookupError(f"No dimension element with name '{table}'.") from err
except KeyError:
if table == "timespan" or table == "datetime" or table == "timestamp":
raise LookupError(
"Dimension element name cannot be inferred in this context; "
f"use <dimension>.timespan.{column} instead."
) from None
raise LookupError(f"No dimension element with name {table!r} in {name!r}.") from None
if isinstance(element, Dimension) and column == element.primaryKey.name:
# Allow e.g. "visit.id = x" instead of just "visit = x"; this
# can be clearer.
Expand Down Expand Up @@ -172,8 +177,9 @@ def categorizeOrderByName(graph: DimensionGraph, name: str) -> tuple[DimensionEl
field_name = name
elif len(matches) > 1:
raise ValueError(
f"Timespan exists in more than one dimesion element: {matches},"
" qualify timespan with specific dimension name."
"Timespan exists in more than one dimension element "
f"({', '.join(element.name for element in matches)}); "
"qualify timespan with specific dimension name."
)
else:
raise ValueError(f"Cannot find any temporal dimension element for '{name}'.")
Expand All @@ -197,16 +203,21 @@ def categorizeOrderByName(graph: DimensionGraph, name: str) -> tuple[DimensionEl
field_name = name
elif len(match_pairs) > 1:
raise ValueError(
f"Metadata '{name}' exists in more than one dimension element: "
f"{[element for element, _ in match_pairs]}, qualify metadata name with dimension name."
f"Metadata '{name}' exists in more than one dimension element "
f"({', '.join(element.name for element, _ in match_pairs)}); "
"qualify field name with dimension name."
)
else:
raise ValueError(f"Metadata '{name}' cannot be found in any dimension.")
else:
# qualified name, must be a dimension element and a field
elem_name, _, field_name = name.partition(".")
if elem_name not in graph.elements.names:
raise ValueError(f"Unknown dimension element name '{elem_name}'")
if field_name == "begin" or field_name == "end":
raise ValueError(
f"Unknown dimension element {elem_name!r}; perhaps you meant 'timespan.{field_name}'?"
)
raise ValueError(f"Unknown dimension element {elem_name!r}.")
element = graph.elements[elem_name]
if field_name in ("timespan.begin", "timespan.end"):
if not element.temporal:
Expand Down Expand Up @@ -289,7 +300,11 @@ def categorizeElementOrderByName(element: DimensionElement, name: str) -> str |
# qualified name, must be a dimension element and a field
elem_name, _, field_name = name.partition(".")
if elem_name != element.name:
raise ValueError(f"Element name mismatch: '{elem_name}' instead of '{element}'")
if field_name == "begin" or field_name == "end":
extra = f"; perhaps you meant 'timespan.{field_name}'?"
else:
extra = "."
raise ValueError(f"Element name mismatch: '{elem_name}' instead of '{element}'{extra}")
if field_name in ("timespan.begin", "timespan.end"):
if not element.temporal:
raise ValueError(f"Cannot use '{field_name}' with non-temporal element '{element}'.")
Expand Down
33 changes: 31 additions & 2 deletions python/lsst/daf/butler/registry/tests/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2765,6 +2765,18 @@ def testQueryResultSummaries(self):
self.assertTrue(messages)
self.assertTrue(any("no-purpose" in message for message in messages))

def testQueryDataIdsExpressionError(self):
"""Test error checking of 'where' expressions in queryDataIds."""
registry = self.makeRegistry()
self.loadData(registry, "base.yaml")
bind = {"time": astropy.time.Time("2020-01-01T01:00:00", format="isot", scale="tai")}
with self.assertRaisesRegex(LookupError, r"No dimension element with name 'foo' in 'foo\.bar'\."):
registry.queryDataIds(["detector"], where="foo.bar = 12")
with self.assertRaisesRegex(
LookupError, "Dimension element name cannot be inferred in this context."
):
registry.queryDataIds(["detector"], where="timespan.end < time", bind=bind)

def testQueryDataIdsOrderBy(self):
"""Test order_by and limit on result returned by queryDataIds()."""
registry = self.makeRegistry()
Expand Down Expand Up @@ -2858,7 +2870,7 @@ def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
list(do_query().order_by(order_by))

for order_by in ("undimension.name", "-undimension.name"):
with self.assertRaisesRegex(ValueError, "Unknown dimension element name 'undimension'"):
with self.assertRaisesRegex(ValueError, "Unknown dimension element 'undimension'"):
list(do_query().order_by(order_by))

for order_by in ("attract", "-attract"):
Expand All @@ -2868,7 +2880,11 @@ def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
with self.assertRaisesRegex(ValueError, "Metadata 'exposure_time' exists in more than one dimension"):
list(do_query(("exposure", "visit")).order_by("exposure_time"))

with self.assertRaisesRegex(ValueError, "Timespan exists in more than one dimesion"):
with self.assertRaisesRegex(
ValueError,
r"Timespan exists in more than one dimension element \(exposure, visit\); "
r"qualify timespan with specific dimension name\.",
):
list(do_query(("exposure", "visit")).order_by("timespan.begin"))

with self.assertRaisesRegex(
Expand All @@ -2882,6 +2898,11 @@ def do_query(dimensions=("visit", "tract"), datasets=None, collections=None):
with self.assertRaisesRegex(ValueError, "Field 'name' does not exist in 'tract'."):
list(do_query("tract").order_by("tract.name"))

with self.assertRaisesRegex(
ValueError, r"Unknown dimension element 'timestamp'; perhaps you meant 'timespan.begin'\?"
):
list(do_query("visit").order_by("timestamp.begin"))

def testQueryDataIdsGovernorExceptions(self):
"""Test exceptions raised by queryDataIds() for incorrect governors."""
registry = self.makeRegistry()
Expand Down Expand Up @@ -3001,6 +3022,14 @@ def do_query(element, datasets=None, collections=None):
with self.assertRaisesRegex(ValueError, "Field 'attract' does not exist in 'detector'."):
list(do_query("detector").order_by(order_by))

for order_by in ("timestamp.begin", "-timestamp.begin"):
with self.assertRaisesRegex(
ValueError,
r"Element name mismatch: 'timestamp' instead of 'visit'; "
r"perhaps you meant 'timespan.begin'\?",
):
list(do_query("visit").order_by(order_by))

def testQueryDimensionRecordsExceptions(self):
"""Test exceptions raised by queryDimensionRecords()."""
registry = self.makeRegistry()
Expand Down

0 comments on commit f8470df

Please sign in to comment.