From 3533f90348bd08cbc4e193de32d3817b89904c8a Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 6 Sep 2024 17:20:27 -0700 Subject: [PATCH] Do not query all the collections again unless there are wildcards It can be slow to run the query on a list of hundreds of collections just on the off chance they have wildcards. --- python/lsst/daf/butler/_butler.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py index 2a9d8d0ac4..1630057df9 100644 --- a/python/lsst/daf/butler/_butler.py +++ b/python/lsst/daf/butler/_butler.py @@ -50,6 +50,7 @@ from .dimensions import DataCoordinate, DimensionConfig from .registry import RegistryConfig, _RegistryFactory from .repo_relocation import BUTLER_ROOT_TAG +from .utils import has_globs if TYPE_CHECKING: from ._dataset_existence import DatasetExistence @@ -1688,17 +1689,17 @@ def query_datasets( data_id = DataCoordinate.make_empty(self.dimensions) if order_by is None: order_by = [] - if collections: + if collections and has_globs(collections): # Wild cards need to be expanded but can only be allowed if # find_first=False because expanding wildcards does not return - # a guaranteed ordering. - expanded_collections = self.collections.query(collections) - if find_first and set(expanded_collections) != set(ensure_iterable(collections)): + # a guaranteed ordering. Querying collection registry to expand + # collections when we do not have wildcards is expensive so only + # do it if we need it. + if find_first: raise RuntimeError( - "Can not use wildcards in collections when find_first=True " - f" (given {collections} which expanded to {expanded_collections})" + f"Can not use wildcards in collections when find_first=True (given {collections})" ) - collections = expanded_collections + collections = self.collections.query(collections) query_limit = limit warn_limit = False if limit is not None and limit < 0: