From e7530c422772d826fdbedee8b3c14b88533078d0 Mon Sep 17 00:00:00 2001 From: Jim Bosch Date: Fri, 30 Aug 2024 14:59:35 -0400 Subject: [PATCH] Relax constraint on governors in query WHERE expressions. We want to raise when somebody says "visit = 42" as a query constraint without also specifying the instrument that makes that visit ID meaningful. But a multi-instrument "visit.region OVERLAPS {region}" query (for example) should be perfectly fine. --- .../daf/butler/direct_query_driver/_driver.py | 6 +- python/lsst/daf/butler/queries/tree/_base.py | 16 +++++ .../butler/queries/tree/_column_expression.py | 12 ++++ .../butler/queries/tree/_column_reference.py | 21 +++++++ .../daf/butler/queries/tree/_predicate.py | 58 +++++++++++++++++++ 5 files changed, 110 insertions(+), 3 deletions(-) diff --git a/python/lsst/daf/butler/direct_query_driver/_driver.py b/python/lsst/daf/butler/direct_query_driver/_driver.py index 88be6c85b7..4ccd769fad 100644 --- a/python/lsst/daf/butler/direct_query_driver/_driver.py +++ b/python/lsst/daf/butler/direct_query_driver/_driver.py @@ -934,9 +934,9 @@ def _analyze_query_tree(self, tree: qt.QueryTree) -> tuple[QueryJoinsPlan, Query # without constraining their governor dimensions, since that's a # particularly easy mistake to make and it's almost never intentional. # We also allow the registry data ID values to provide governor values. - where_columns = qt.ColumnSet(self.universe.empty) - result.predicate.gather_required_columns(where_columns) - for governor in where_columns.dimensions.governors: + where_governors: set[str] = set() + result.predicate.gather_governors(where_governors) + for governor in where_governors: if governor not in result.constraint_data_id: if governor in self._default_data_id.dimensions: result.constraint_data_id[governor] = self._default_data_id[governor] diff --git a/python/lsst/daf/butler/queries/tree/_base.py b/python/lsst/daf/butler/queries/tree/_base.py index e641dff84b..180940cbcd 100644 --- a/python/lsst/daf/butler/queries/tree/_base.py +++ b/python/lsst/daf/butler/queries/tree/_base.py @@ -132,6 +132,18 @@ def gather_required_columns(self, columns: ColumnSet) -> None: """ raise NotImplementedError() + @abstractmethod + def gather_governors(self, governors: set[str]) -> None: + """Add any governor dimensions that need to be fully identified for + this column expression to be sound. + + Parameters + ---------- + governors : `set` [ `str` ] + Set of governor dimension names to modify in place. + """ + raise NotImplementedError() + @abstractmethod def visit(self, visitor: ColumnExpressionVisitor[_T]) -> _T: """Invoke the visitor interface. @@ -177,6 +189,10 @@ def gather_required_columns(self, columns: ColumnSet) -> None: # Docstring inherited. pass + def gather_governors(self, governors: set[str]) -> None: + # Docstring inherited. + pass + @property def column_type(self) -> ColumnType: # Docstring inherited. diff --git a/python/lsst/daf/butler/queries/tree/_column_expression.py b/python/lsst/daf/butler/queries/tree/_column_expression.py index 902d9df956..61b2642efc 100644 --- a/python/lsst/daf/butler/queries/tree/_column_expression.py +++ b/python/lsst/daf/butler/queries/tree/_column_expression.py @@ -80,6 +80,10 @@ def gather_required_columns(self, columns: ColumnSet) -> None: # Docstring inherited. self.operand.gather_required_columns(columns) + def gather_governors(self, governors: set[str]) -> None: + # Docstring inherited. + self.operand.gather_governors(governors) + @property def column_type(self) -> ColumnType: # Docstring inherited. @@ -146,6 +150,11 @@ def gather_required_columns(self, columns: ColumnSet) -> None: self.a.gather_required_columns(columns) self.b.gather_required_columns(columns) + def gather_governors(self, governors: set[str]) -> None: + # Docstring inherited. + self.a.gather_governors(governors) + self.b.gather_governors(governors) + @property def column_type(self) -> ColumnType: # Docstring inherited. @@ -208,6 +217,9 @@ def gather_required_columns(self, columns: ColumnSet) -> None: # Docstring inherited. self.operand.gather_required_columns(columns) + def gather_governors(self, governors: set[str]) -> None: + self.operand.gather_governors(governors) + @property def column_type(self) -> ColumnType: # Docstring inherited. diff --git a/python/lsst/daf/butler/queries/tree/_column_reference.py b/python/lsst/daf/butler/queries/tree/_column_reference.py index bd8b7373ec..6f67ef66eb 100644 --- a/python/lsst/daf/butler/queries/tree/_column_reference.py +++ b/python/lsst/daf/butler/queries/tree/_column_reference.py @@ -61,6 +61,10 @@ def gather_required_columns(self, columns: ColumnSet) -> None: # Docstring inherited. columns.update_dimensions(self.dimension.minimal_group) + def gather_governors(self, governors: set[str]) -> None: + if self.dimension.governor is not None: + governors.add(self.dimension.governor.name) + @property def column_type(self) -> ColumnType: # Docstring inherited. @@ -95,6 +99,19 @@ def gather_required_columns(self, columns: ColumnSet) -> None: columns.update_dimensions(self.element.minimal_group) columns.dimension_fields[self.element.name].add(self.field) + def gather_governors(self, governors: set[str]) -> None: + # Docstring inherited. + # We assume metadata fields (and certainly region and timespan fields) + # don't need to be qualified with (e.g.) an instrument or skymap name + # to make sense, but keys like visit IDs or detector names do need to + # e qualified. + if ( + isinstance(self.element, Dimension) + and self.field in self.element.alternate_keys.names + and self.element.governor is not None + ): + governors.add(self.element.governor.name) + @property def column_type(self) -> ColumnType: # Docstring inherited. @@ -134,6 +151,10 @@ def gather_required_columns(self, columns: ColumnSet) -> None: # Docstring inherited. columns.dataset_fields[self.dataset_type].add(self.field) + def gather_governors(self, governors: set[str]) -> None: + # Docstring inherited. + pass + @property def column_type(self) -> ColumnType: # Docstring inherited. diff --git a/python/lsst/daf/butler/queries/tree/_predicate.py b/python/lsst/daf/butler/queries/tree/_predicate.py index 15368b491e..476f831ecc 100644 --- a/python/lsst/daf/butler/queries/tree/_predicate.py +++ b/python/lsst/daf/butler/queries/tree/_predicate.py @@ -84,6 +84,18 @@ def gather_required_columns(self, columns: ColumnSet) -> None: """ raise NotImplementedError() + @abstractmethod + def gather_governors(self, governors: set[str]) -> None: + """Add any governor dimensions that need to be fully identified for + this column expression to be sound. + + Parameters + ---------- + governors : `set` [ `str` ] + Set of governor dimension names to modify in place. + """ + raise NotImplementedError() + def invert(self) -> PredicateLeaf: """Return a new leaf that is the logical not of this one.""" return LogicalNot.model_construct(operand=cast("LogicalNotOperand", self)) @@ -293,6 +305,19 @@ def gather_required_columns(self, columns: ColumnSet) -> None: for operand in or_group: operand.gather_required_columns(columns) + def gather_governors(self, governors: set[str]) -> None: + """Add any governor dimensions that need to be fully identified for + this column expression to be sound. + + Parameters + ---------- + governors : `set` [ `str` ] + Set of governor dimension names to modify in place. + """ + for or_group in self.operands: + for operand in or_group: + operand.gather_governors(governors) + def logical_and(self, *args: Predicate) -> Predicate: """Construct a predicate representing the logical AND of this predicate and one or more others. @@ -421,6 +446,10 @@ def gather_required_columns(self, columns: ColumnSet) -> None: # Docstring inherited. self.operand.gather_required_columns(columns) + def gather_governors(self, governors: set[str]) -> None: + # Docstring inherited. + self.operand.gather_governors(governors) + def __str__(self) -> str: return f"NOT {self.operand}" @@ -445,6 +474,10 @@ def gather_required_columns(self, columns: ColumnSet) -> None: # Docstring inherited. self.operand.gather_required_columns(columns) + def gather_governors(self, governors: set[str]) -> None: + # Docstring inherited. + self.operand.gather_governors(governors) + def __str__(self) -> str: return f"{self.operand}" @@ -466,6 +499,10 @@ def gather_required_columns(self, columns: ColumnSet) -> None: # Docstring inherited. self.operand.gather_required_columns(columns) + def gather_governors(self, governors: set[str]) -> None: + # Docstring inherited. + self.operand.gather_governors(governors) + def __str__(self) -> str: return f"{self.operand} IS NULL" @@ -496,6 +533,11 @@ def gather_required_columns(self, columns: ColumnSet) -> None: self.a.gather_required_columns(columns) self.b.gather_required_columns(columns) + def gather_governors(self, governors: set[str]) -> None: + # Docstring inherited. + self.a.gather_governors(governors) + self.b.gather_governors(governors) + def __str__(self) -> str: return f"{self.a} {self.operator.upper()} {self.b}" @@ -556,6 +598,12 @@ def gather_required_columns(self, columns: ColumnSet) -> None: for item in self.container: item.gather_required_columns(columns) + def gather_governors(self, governors: set[str]) -> None: + # Docstring inherited. + self.member.gather_governors(governors) + for item in self.container: + item.gather_governors(governors) + def __str__(self) -> str: return f"{self.member} IN [{', '.join(str(item) for item in self.container)}]" @@ -598,6 +646,10 @@ def gather_required_columns(self, columns: ColumnSet) -> None: # Docstring inherited. self.member.gather_required_columns(columns) + def gather_governors(self, governors: set[str]) -> None: + # Docstring inherited. + self.member.gather_governors(governors) + def __str__(self) -> str: s = f"{self.start if self.start else ''}:{self.stop if self.stop is not None else ''}" if self.step != 1: @@ -644,6 +696,12 @@ def gather_required_columns(self, columns: ColumnSet) -> None: # attached to, not `self.column`, which belongs to `self.query_tree`. self.member.gather_required_columns(columns) + def gather_governors(self, governors: set[str]) -> None: + # Docstring inherited. + # We're only gathering governors from the query_tree this predicate is + # attached to, not `self.column`, which belongs to `self.query_tree`. + self.member.gather_governors(governors) + def __str__(self) -> str: return f"{self.member} IN (query).{self.column}"