From 5f64de89f420017beab896f837823b28d7518d79 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Mon, 9 Sep 2024 09:34:44 -0700 Subject: [PATCH] Add the negative --limit facility to dimension records and data IDs command line This was missed before since we added negative support at the last minute. We can't actually use that negative support handling because the command line uses advanced query interface to do the dataset type joining. --- python/lsst/daf/butler/cli/opt/options.py | 9 ++++++-- python/lsst/daf/butler/script/queryDataIds.py | 21 +++++++++++++++++-- .../butler/script/queryDimensionRecords.py | 16 ++++++++++++-- 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/python/lsst/daf/butler/cli/opt/options.py b/python/lsst/daf/butler/cli/opt/options.py index bddb437d10..3056fb7277 100644 --- a/python/lsst/daf/butler/cli/opt/options.py +++ b/python/lsst/daf/butler/cli/opt/options.py @@ -288,11 +288,16 @@ def _config_split(*args: Any) -> dict[str | None, str]: ) +_default_limit = -20_000 limit_option = MWOptionDecorator( "--limit", - help=unwrap("Limit the number of records, by default all records are shown."), + help=unwrap( + f"""Limit the number of results that are processed. 0 means no limit. A negative + value specifies a cap where a warning will be issued if the cap is hit. + Default value is {_default_limit}.""" + ), type=int, - default=0, + default=_default_limit, ) offset_option = MWOptionDecorator( diff --git a/python/lsst/daf/butler/script/queryDataIds.py b/python/lsst/daf/butler/script/queryDataIds.py index bab1a88749..566093e438 100644 --- a/python/lsst/daf/butler/script/queryDataIds.py +++ b/python/lsst/daf/butler/script/queryDataIds.py @@ -57,6 +57,14 @@ def __init__(self, dataIds: Iterable[DataCoordinate]): # use dict to store dataIds as keys to preserve ordering self.dataIds = dict.fromkeys(dataIds) + def __len__(self) -> int: + return len(self.dataIds) + + def pop_last(self) -> None: + if self.dataIds: + final_key = list(self.dataIds.keys())[-1] + self.dataIds.pop(final_key) + def getAstropyTable(self, order: bool) -> AstropyTable: """Get the table as an astropy table. @@ -195,12 +203,21 @@ def queryDataIds( results = results.where(where) if order_by: results = results.order_by(*order_by) - if limit > 0: - results = results.limit(limit) + query_limit = abs(limit) + warn_limit = False + if limit != 0: + if limit < 0: + query_limit += 1 + warn_limit = True + + results = results.limit(query_limit) if results.any(exact=False): if results.dimensions: table = _Table(results) + if warn_limit and len(table) == query_limit: + table.pop_last() + _LOG.warning("More data IDs are available than the request limit of %d", abs(limit)) if not table.dataIds: return None, "Post-query region filtering removed all rows, since nothing overlapped." return table.getAstropyTable(not order_by), None diff --git a/python/lsst/daf/butler/script/queryDimensionRecords.py b/python/lsst/daf/butler/script/queryDimensionRecords.py index 7e97bb9dc3..72d9a9bea9 100644 --- a/python/lsst/daf/butler/script/queryDimensionRecords.py +++ b/python/lsst/daf/butler/script/queryDimensionRecords.py @@ -27,6 +27,7 @@ from __future__ import annotations +import logging from operator import attrgetter from typing import Any @@ -36,6 +37,8 @@ from .._butler import Butler from .._timespan import Timespan +_LOG = logging.getLogger(__name__) + def queryDimensionRecords( repo: str, @@ -100,10 +103,19 @@ def queryDimensionRecords( query_results = query_results.where(where) if order_by: query_results = query_results.order_by(*order_by) - if limit > 0: - query_results = query_results.limit(limit) + query_limit = abs(limit) + warn_limit = False + if limit != 0: + if limit < 0: + query_limit += 1 + warn_limit = True + + query_results = query_results.limit(query_limit) records = list(query_results) + if warn_limit and len(records) == query_limit: + records.pop(-1) + _LOG.warning("More data IDs are available than the request limit of %d", abs(limit)) if not records: return None