Skip to content

Commit

Permalink
Fix issue with "collection" dataset field
Browse files Browse the repository at this point in the history
Fix an issue similar to the previous commit, where query_datasets would fail on Postgres 16 with the error 'psycopg2.errors.DatatypeMismatch: could not determine polymorphic type because input has type unknown'.

This was occurring when there was a single collection specified for the dataset search, causing a literal  "collection" dataset field value to be included in an any_value aggregate function.
  • Loading branch information
dhirving committed Sep 13, 2024
1 parent 13da234 commit 1d5ca5a
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,13 @@ def _finish_query_builder(
only_collection_record = collections[0]
sql_projection.joiner.where(collection_col == only_collection_record.key)
if "collection" in fields:
fields_provided["collection"] = sqlalchemy.literal(only_collection_record.name)
fields_provided["collection"] = sqlalchemy.literal(only_collection_record.name).cast(
# This cast is necessary to ensure that Postgres knows the
# type of this column if it is used in an aggregate
# function.
sqlalchemy.String
)

elif not collections:
sql_projection.joiner.where(sqlalchemy.literal(False))
if "collection" in fields:
Expand Down
19 changes: 18 additions & 1 deletion python/lsst/daf/butler/tests/butler_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -1832,7 +1832,8 @@ def test_dataset_queries(self) -> None:

# Tests for a regression of DM-46340, where invalid SQL would be
# generated when the list of collections is a single run collection and
# there is region-postprocessing logic involved.
# there is region-postprocessing logic involved. This was due to
# missing type information associated with the "run" dataset field.
result = butler.query_datasets(
"dt",
"run",
Expand All @@ -1841,6 +1842,22 @@ def test_dataset_queries(self) -> None:
)
self.assertEqual(result[0].dataId, {"instrument": "Cam1", "visit": 1, "detector": 1})

# A similar issue to the "run" issue above was occuring with the
# 'collection' dataset field.
with butler.query() as query:
rows = list(
query.join_dataset_search("dt", "run")
.where("instrument='Cam1' and skymap='SkyMap1' and visit=1 and tract=0")
.general(
dimensions=["visit", "detector"],
dataset_fields={"dt": set(["collection"])},
find_first=True,
)
)
self.assertEqual(len(rows), 1)
self.assertEqual(rows[0]["visit"], 1)
self.assertEqual(rows[0]["dt.collection"], "run")


def _get_exposure_ids_from_dimension_records(dimension_records: Iterable[DimensionRecord]) -> list[int]:
output = []
Expand Down

0 comments on commit 1d5ca5a

Please sign in to comment.