Skip to content

Commit

Permalink
Fix handling of find_first=True with collection wildcards
Browse files Browse the repository at this point in the history
  • Loading branch information
timj committed Sep 5, 2024
1 parent bbf27d7 commit dc9b74c
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 6 deletions.
19 changes: 15 additions & 4 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1588,14 +1588,16 @@ def query_datasets(
Dataset type object or name to search for.
collections : collection expression, optional
A collection name or iterable of collection names to search. If not
provided, the default collections are used. Can be a wildcard. See
:ref:`daf_butler_collection_expressions` for more information.
provided, the default collections are used. Can be a wildcard if
``find_first`` is `False` (if find first is requested the order
of collections matters and wildcards make the order indeterminate).
See :ref:`daf_butler_collection_expressions` for more information.
find_first : `bool`, optional
If `True` (default), for each result data ID, only yield one
`DatasetRef` of each `DatasetType`, from the first collection in
which a dataset of that dataset type appears (according to the
order of ``collections`` passed in). If `True`, ``collections``
must not contain regular expressions and may not be ``...``.
must not contain wildcards.
data_id : `dict` or `DataCoordinate`, optional
A data ID whose key-value pairs are used as equality constraints in
the query.
Expand Down Expand Up @@ -1667,7 +1669,16 @@ def query_datasets(
if order_by is None:
order_by = []
if collections:
collections = self.collections.query(collections)
# Wild cards need to be expanded but can only be allowed if
# find_first=False because expanding wildcards does not return
# a guaranteed ordering.
expanded_collections = self.collections.query(collections)
if find_first and set(expanded_collections) != set(ensure_iterable(collections)):
raise RuntimeError(
"Can not use wildcards in collections when find_first=True "
f" (given {collections} which expanded to {expanded_collections})"
)
collections = expanded_collections
with self.query() as query:
result = (
query.where(data_id, where, bind=bind, **kwargs)
Expand Down
5 changes: 4 additions & 1 deletion python/lsst/daf/butler/script/queryDatasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,10 @@ def getDatasets(self) -> Iterator[DatasetRef]:

# Expand the collections query and include summary information.
query_collections_info = self.butler.collections.query_info(query_collections, include_summary=True)
query_collections = [c.name for c in query_collections_info]
expanded_query_collections = [c.name for c in query_collections_info]
if self._find_first and set(query_collections) != set(expanded_query_collections):
raise RuntimeError("Can not use wildcards in collections when find_first=True")

Check warning on line 247 in python/lsst/daf/butler/script/queryDatasets.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/script/queryDatasets.py#L247

Added line #L247 was not covered by tests
query_collections = expanded_query_collections

# Only iterate over dataset types that are relevant for the query.
dataset_types = set(
Expand Down
5 changes: 4 additions & 1 deletion python/lsst/daf/butler/tests/butler_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,11 @@ def test_simple_dataset_query(self) -> None:
self.assertEqual(refs_q[0].id, UUID("e15ab039-bc8b-4135-87c5-90902a7c0b22"))
self.assertEqual(refs_q[1].id, UUID("51352db4-a47a-447c-b12d-a50b206b17cd"))

with self.assertRaises(RuntimeError) as cm:
butler.query_datasets("bias", "*", detector=100, instrument="Unknown", find_first=True)
self.assertIn("Can not use wildcards", str(cm.exception))
with self.assertRaises(EmptyQueryResultError) as cm:
butler.query_datasets("bias", "*", detector=100, instrument="Unknown")
butler.query_datasets("bias", "*", detector=100, instrument="Unknown", find_first=False)
self.assertIn("doomed", str(cm.exception))

def test_general_query(self) -> None:
Expand Down

0 comments on commit dc9b74c

Please sign in to comment.