From dc9b74c30ef7ab2e90ebcb2e49e0f3b18817bf12 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Thu, 5 Sep 2024 11:59:08 -0700 Subject: [PATCH] Fix handling of find_first=True with collection wildcards --- python/lsst/daf/butler/_butler.py | 19 +++++++++++++++---- .../lsst/daf/butler/script/queryDatasets.py | 5 ++++- .../lsst/daf/butler/tests/butler_queries.py | 5 ++++- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py index 166e2280ee..e109dbe64b 100644 --- a/python/lsst/daf/butler/_butler.py +++ b/python/lsst/daf/butler/_butler.py @@ -1588,14 +1588,16 @@ def query_datasets( Dataset type object or name to search for. collections : collection expression, optional A collection name or iterable of collection names to search. If not - provided, the default collections are used. Can be a wildcard. See - :ref:`daf_butler_collection_expressions` for more information. + provided, the default collections are used. Can be a wildcard if + ``find_first`` is `False` (if find first is requested the order + of collections matters and wildcards make the order indeterminate). + See :ref:`daf_butler_collection_expressions` for more information. find_first : `bool`, optional If `True` (default), for each result data ID, only yield one `DatasetRef` of each `DatasetType`, from the first collection in which a dataset of that dataset type appears (according to the order of ``collections`` passed in). If `True`, ``collections`` - must not contain regular expressions and may not be ``...``. + must not contain wildcards. data_id : `dict` or `DataCoordinate`, optional A data ID whose key-value pairs are used as equality constraints in the query. @@ -1667,7 +1669,16 @@ def query_datasets( if order_by is None: order_by = [] if collections: - collections = self.collections.query(collections) + # Wild cards need to be expanded but can only be allowed if + # find_first=False because expanding wildcards does not return + # a guaranteed ordering. + expanded_collections = self.collections.query(collections) + if find_first and set(expanded_collections) != set(ensure_iterable(collections)): + raise RuntimeError( + "Can not use wildcards in collections when find_first=True " + f" (given {collections} which expanded to {expanded_collections})" + ) + collections = expanded_collections with self.query() as query: result = ( query.where(data_id, where, bind=bind, **kwargs) diff --git a/python/lsst/daf/butler/script/queryDatasets.py b/python/lsst/daf/butler/script/queryDatasets.py index 5d092cda69..7343c56ca1 100644 --- a/python/lsst/daf/butler/script/queryDatasets.py +++ b/python/lsst/daf/butler/script/queryDatasets.py @@ -242,7 +242,10 @@ def getDatasets(self) -> Iterator[DatasetRef]: # Expand the collections query and include summary information. query_collections_info = self.butler.collections.query_info(query_collections, include_summary=True) - query_collections = [c.name for c in query_collections_info] + expanded_query_collections = [c.name for c in query_collections_info] + if self._find_first and set(query_collections) != set(expanded_query_collections): + raise RuntimeError("Can not use wildcards in collections when find_first=True") + query_collections = expanded_query_collections # Only iterate over dataset types that are relevant for the query. dataset_types = set( diff --git a/python/lsst/daf/butler/tests/butler_queries.py b/python/lsst/daf/butler/tests/butler_queries.py index f893e64827..f6b11f2470 100644 --- a/python/lsst/daf/butler/tests/butler_queries.py +++ b/python/lsst/daf/butler/tests/butler_queries.py @@ -257,8 +257,11 @@ def test_simple_dataset_query(self) -> None: self.assertEqual(refs_q[0].id, UUID("e15ab039-bc8b-4135-87c5-90902a7c0b22")) self.assertEqual(refs_q[1].id, UUID("51352db4-a47a-447c-b12d-a50b206b17cd")) + with self.assertRaises(RuntimeError) as cm: + butler.query_datasets("bias", "*", detector=100, instrument="Unknown", find_first=True) + self.assertIn("Can not use wildcards", str(cm.exception)) with self.assertRaises(EmptyQueryResultError) as cm: - butler.query_datasets("bias", "*", detector=100, instrument="Unknown") + butler.query_datasets("bias", "*", detector=100, instrument="Unknown", find_first=False) self.assertIn("doomed", str(cm.exception)) def test_general_query(self) -> None: