Skip to content

Commit

Permalink
Do not query all the collections again unless there are wildcards
Browse files Browse the repository at this point in the history
It can be slow to run the query on a list of hundreds of collections
just on the off chance they have wildcards.
  • Loading branch information
timj committed Sep 9, 2024
1 parent 709d25a commit 3533f90
Showing 1 changed file with 8 additions and 7 deletions.
15 changes: 8 additions & 7 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
from .dimensions import DataCoordinate, DimensionConfig
from .registry import RegistryConfig, _RegistryFactory
from .repo_relocation import BUTLER_ROOT_TAG
from .utils import has_globs

if TYPE_CHECKING:
from ._dataset_existence import DatasetExistence
Expand Down Expand Up @@ -1688,17 +1689,17 @@ def query_datasets(
data_id = DataCoordinate.make_empty(self.dimensions)
if order_by is None:
order_by = []
if collections:
if collections and has_globs(collections):
# Wild cards need to be expanded but can only be allowed if
# find_first=False because expanding wildcards does not return
# a guaranteed ordering.
expanded_collections = self.collections.query(collections)
if find_first and set(expanded_collections) != set(ensure_iterable(collections)):
# a guaranteed ordering. Querying collection registry to expand
# collections when we do not have wildcards is expensive so only
# do it if we need it.
if find_first:
raise RuntimeError(
"Can not use wildcards in collections when find_first=True "
f" (given {collections} which expanded to {expanded_collections})"
f"Can not use wildcards in collections when find_first=True (given {collections})"
)
collections = expanded_collections
collections = self.collections.query(collections)
query_limit = limit
warn_limit = False
if limit is not None and limit < 0:
Expand Down

0 comments on commit 3533f90

Please sign in to comment.