Skip to content

Commit

Permalink
Merge pull request #871 from lsst/tickets/DM-40184
Browse files Browse the repository at this point in the history
DM-40184: fix rare bug in QG generation with warps as an overall-input
  • Loading branch information
TallJimbo authored Jul 31, 2023
2 parents a997e9e + 8879207 commit 4927b47
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 1 deletion.
3 changes: 3 additions & 0 deletions doc/changes/DM-40184.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix a rare bug in follow-up dataset queries involving relation commutators.

This occurred when building QuantumGraphs where a "warp" dataset type was an overall input to the pipeline and present in more than one input RUN collection.
23 changes: 22 additions & 1 deletion python/lsst/daf/butler/registry/queries/find_first_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from collections.abc import Sequence, Set
from typing import final

from lsst.daf.relation import ColumnTag, Relation, RowFilter
from lsst.daf.relation import ColumnTag, Relation, RowFilter, UnaryCommutator, UnaryOperationRelation
from lsst.utils.classes import cached_getter

from ...core import DatasetColumnTag, DimensionKeyColumnTag
Expand Down Expand Up @@ -73,3 +73,24 @@ def __str__(self) -> str:
def applied_min_rows(self, target: Relation) -> int:
# Docstring inherited.
return 1 if target.min_rows else 0

def commute(self, current: UnaryOperationRelation) -> UnaryCommutator:
# Docstring inherited.
if not self.columns_required <= current.target.columns:
return UnaryCommutator(
first=None,
second=current.operation,
done=False,
messages=(
f"{current.target} is missing columns "
f"{set(self.columns_required - current.target.columns)}",
),
)
if current.operation.is_count_dependent:
return UnaryCommutator(
first=None,
second=current.operation,
done=False,
messages=(f"{current.operation} is count-dependent",),
)
return UnaryCommutator(self, current.operation)
46 changes: 46 additions & 0 deletions python/lsst/daf/butler/registry/tests/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -3428,6 +3428,52 @@ def pop_transfer(tree: Relation) -> Relation:
sql.Engine,
)

def test_query_find_datasets_drop_postprocessing(self) -> None:
"""Test that DataCoordinateQueryResults.findDatasets avoids commutator
problems with the FindFirstDataset relation operation.
"""
# Setup: load some visit, tract, and patch records, and insert two
# datasets with dimensions {visit, patch}, with one in each of two
# RUN collections.
registry = self.makeRegistry()
self.loadData(registry, "base.yaml")
self.loadData(registry, "spatial.yaml")
storage_class = StorageClass("Warpy")
registry.storageClasses.registerStorageClass(storage_class)
dataset_type = DatasetType(
"warp", {"visit", "patch"}, storageClass=storage_class, universe=registry.dimensions
)
registry.registerDatasetType(dataset_type)
(data_id,) = registry.queryDataIds(["visit", "patch"]).limit(1)
registry.registerRun("run1")
registry.registerRun("run2")
(ref1,) = registry.insertDatasets(dataset_type, [data_id], run="run1")
(ref2,) = registry.insertDatasets(dataset_type, [data_id], run="run2")
# Query for the dataset using queryDataIds(...).findDatasets(...)
# against only one of the two collections. This should work even
# though the relation returned by queryDataIds ends with
# iteration-engine region-filtering, because we can recognize before
# running the query that there is only one collecton to search and
# hence the (default) findFirst=True is irrelevant, and joining in the
# dataset query commutes past the iteration-engine postprocessing.
query1 = registry.queryDataIds(
{"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
)
self.assertEqual(
set(query1.findDatasets(dataset_type.name, collections=["run1"])),
{ref1},
)
# Query for the dataset using queryDataIds(...).findDatasets(...)
# against both collections. This can only work if the FindFirstDataset
# operation can be commuted past the iteration-engine options into SQL.
query2 = registry.queryDataIds(
{"visit", "patch"}, visit=data_id["visit"], instrument=data_id["instrument"]
)
self.assertEqual(
set(query2.findDatasets(dataset_type.name, collections=["run2", "run1"])),
{ref2},
)

def test_query_empty_collections(self) -> None:
"""Test for registry query methods with empty collections. The methods
should return empty result set (or None when applicable) and provide
Expand Down

0 comments on commit 4927b47

Please sign in to comment.