Use new collections API in more scripts

lsst · Aug 15, 2024 · 70b33aa · 70b33aa
1 parent 163f76a
commit 70b33aa
Show file tree

Hide file tree

Showing 12 changed files with 51 additions and 72 deletions.
diff --git a/python/lsst/daf/butler/script/_pruneDatasets.py b/python/lsst/daf/butler/script/_pruneDatasets.py
@@ -223,8 +223,8 @@ def pruneDatasets(
     # If purging, verify that the collection to purge is RUN type collection.
     if purge_run:
         butler = Butler.from_config(repo, without_datastore=True)
-        collectionType = butler.registry.getCollectionType(purge_run)
-        if collectionType is not CollectionType.RUN:
+        collection_info = butler.collections.get_info(purge_run)
+        if collection_info.type is not CollectionType.RUN:
             return PruneDatasetsResult(
                 state=PruneDatasetsResult.State.ERR_PRUNE_ON_NOT_RUN, errDict=dict(collection=purge_run)
             )

diff --git a/python/lsst/daf/butler/script/certifyCalibrations.py b/python/lsst/daf/butler/script/certifyCalibrations.py
@@ -70,13 +70,14 @@ def certifyCalibrations(
         collection, instead of just the most recent one.
     """
     butler = Butler.from_config(repo, writeable=True, without_datastore=True)
-    registry = butler.registry
     timespan = Timespan(
         begin=astropy.time.Time(begin_date, scale="tai") if begin_date is not None else None,
         end=astropy.time.Time(end_date, scale="tai") if end_date is not None else None,
     )
-    if not search_all_inputs and registry.getCollectionType(input_collection) is CollectionType.CHAINED:
-        input_collection = next(iter(registry.getCollectionChain(input_collection)))
+    if not search_all_inputs:
+        collection_info = butler.collections.get_info(input_collection)
+        if collection_info.type is CollectionType.CHAINED:
+            input_collection = collection_info.children[0]
 
     with butler._query() as query:
         results = query.datasets(dataset_type_name, collections=input_collection)
@@ -86,5 +87,5 @@ def certifyCalibrations(
             raise RuntimeError(
                 f"No inputs found for dataset {dataset_type_name} in {input_collection}. {explanation}"
             )
-    registry.registerCollection(output_collection, type=CollectionType.CALIBRATION)
-    registry.certify(output_collection, refs, timespan)
+    butler.registry.registerCollection(output_collection, type=CollectionType.CALIBRATION)
+    butler.registry.certify(output_collection, refs, timespan)
diff --git a/python/lsst/daf/butler/script/collectionChain.py b/python/lsst/daf/butler/script/collectionChain.py
@@ -32,7 +32,6 @@
 from .._butler import Butler
 from .._collection_type import CollectionType
 from ..registry import MissingCollectionError
-from ..registry.wildcards import CollectionWildcard
 
 
 def collectionChain(
@@ -80,7 +79,7 @@ def collectionChain(
         raise RuntimeError(f"Must provide children when defining a collection chain in mode {mode}.")
 
     try:
-        butler.registry.getCollectionType(parent)
+        butler.collections.get_info(parent)
     except MissingCollectionError:
         # Create it -- but only if mode can work with empty chain.
         if mode in ("redefine", "extend", "prepend"):
@@ -96,12 +95,11 @@ def collectionChain(
     if flatten:
         if mode not in ("redefine", "prepend", "extend"):
             raise RuntimeError(f"'flatten' flag is not allowed for {mode}")
-        wildcard = CollectionWildcard.from_names(children)
-        children = butler.registry.queryCollections(wildcard, flattenChains=True)
+        children = butler.collections.query(children, flatten_chains=True)
 
     _modify_collection_chain(butler, mode, parent, children)
 
-    return tuple(butler.registry.getCollectionChain(parent))
+    return butler.collections.get_info(parent).children
 
 
 def _modify_collection_chain(butler: Butler, mode: str, parent: str, children: Iterable[str]) -> None:
@@ -125,7 +123,8 @@ def _find_children_to_pop(butler: Butler, parent: str, children: Iterable[str])
     the given indexes.
     """
     children = list(children)
-    current = butler.registry.getCollectionChain(parent)
+    collection_info = butler.collections.get_info(parent)
+    current = collection_info.children
     n_current = len(current)
     if children:
 

diff --git a/python/lsst/daf/butler/script/exportCalibs.py b/python/lsst/daf/butler/script/exportCalibs.py
@@ -68,7 +68,7 @@ def find_calibration_datasets(
     RuntimeError
         Raised if the collection to search is not a CALIBRATION collection.
     """
-    if butler.registry.getCollectionType(collection) != CollectionType.CALIBRATION:
+    if butler.collections.get_info(collection).type != CollectionType.CALIBRATION:
         raise RuntimeError(f"Collection {collection} is not a CALIBRATION collection.")
 
     exportDatasets = []
@@ -122,7 +122,7 @@ def exportCalibs(
     butler = Butler.from_config(repo, writeable=False)
 
     dataset_type_query = dataset_type or ...
-    collections_query = collections or ...
+    collections_query = collections or "*"
 
     calibTypes = [
         datasetType
@@ -133,18 +133,18 @@ def exportCalibs(
     collectionsToExport = []
     datasetsToExport = []
 
-    for collection in butler.registry.queryCollections(
+    for collection in butler.collections.query(
         collections_query,
-        flattenChains=True,
-        includeChains=True,
-        collectionTypes={CollectionType.CALIBRATION, CollectionType.CHAINED},
+        flatten_chains=True,
+        include_chains=True,
+        collection_types={CollectionType.CALIBRATION, CollectionType.CHAINED},
     ):
         log.info("Checking collection: %s", collection)
 
         # Get collection information.
         collectionsToExport.append(collection)
-        collectionType = butler.registry.getCollectionType(collection)
-        if collectionType == CollectionType.CALIBRATION:
+        info = butler.collections.get_info(collection)
+        if info.type == CollectionType.CALIBRATION:
             exportDatasets = find_calibration_datasets(butler, collection, calibTypes)
             datasetsToExport.extend(exportDatasets)
 

diff --git a/python/lsst/daf/butler/script/queryDataIds.py b/python/lsst/daf/butler/script/queryDataIds.py
@@ -168,9 +168,9 @@ def queryDataIds(
     with butler._query() as query:
         if datasets:
             # Need to constrain results based on dataset type and collection.
-            query_collections = collections or ...
+            query_collections = collections or "*"
 
-            expanded_collections = butler.registry.queryCollections(query_collections)
+            expanded_collections = butler.collections.query(query_collections)
 
             sub_query = query.join_dataset_search(dataset_types.pop(0), collections=expanded_collections)
             for dt in dataset_types:

diff --git a/python/lsst/daf/butler/script/queryDatasets.py b/python/lsst/daf/butler/script/queryDatasets.py
@@ -29,7 +29,6 @@
 import dataclasses
 from collections import defaultdict
 from collections.abc import Iterable, Iterator
-from types import EllipsisType
 from typing import TYPE_CHECKING
 
 import numpy as np
@@ -214,14 +213,14 @@ def getDatasets(self) -> Iterator[DatasetRef]:
             Dataset references matching the given query criteria.
         """
         datasetTypes = self._dataset_type_glob or ...
-        query_collections: Iterable[str] | EllipsisType = self._collections_wildcard or ...
+        query_collections: Iterable[str] = self._collections_wildcard or ["*"]
 
         # Currently need to use old interface to get all the matching
         # dataset types and loop over the dataset types executing a new
         # query each time.
         dataset_types = self.butler.registry.queryDatasetTypes(datasetTypes)
         with self.butler._query() as query:
-            query_collections = self.butler.registry.queryCollections(query_collections)
+            query_collections = self.butler.collections.query(query_collections)
             # Accumulate over dataset types.
             for dt in dataset_types:
                 results = query.datasets(dt, collections=query_collections, find_first=self._find_first)

diff --git a/python/lsst/daf/butler/script/queryDimensionRecords.py b/python/lsst/daf/butler/script/queryDimensionRecords.py
@@ -79,8 +79,8 @@ def queryDimensionRecords(
     with butler._query() as query:
 
         if datasets:
-            query_collections = collections or ...
-            expanded_collections = butler.registry.queryCollections(query_collections)
+            query_collections = collections or "*"
+            expanded_collections = butler.collections.query(query_collections)
             dataset_types = list(butler.registry.queryDatasetTypes(datasets))
 
             sub_query = query.join_dataset_search(dataset_types.pop(0), collections=expanded_collections)

diff --git a/python/lsst/daf/butler/script/removeCollections.py b/python/lsst/daf/butler/script/removeCollections.py
@@ -85,30 +85,18 @@ def _getCollectionInfo(
     """
     butler = Butler.from_config(repo, without_datastore=True)
     try:
-        names = sorted(
-            butler.registry.queryCollections(
-                collectionTypes=frozenset(
-                    (
-                        CollectionType.RUN,
-                        CollectionType.TAGGED,
-                        CollectionType.CHAINED,
-                        CollectionType.CALIBRATION,
-                    )
-                ),
-                expression=collection,
-                includeChains=True,
-            )
-        )
+        names = sorted(butler.collections.query(collection, include_chains=True))
     except MissingCollectionError:
+        # Hide the error and act like no collections should be removed.
         names = []
     collections = Table(names=("Collection", "Collection Type"), dtype=(str, str))
     runCollections = Table(names=("Collection",), dtype=(str,))
     for name in names:
-        collectionType = butler.registry.getCollectionType(name).name
-        if collectionType == "RUN":
+        collection_info = butler.collections.get_info(name)
+        if collection_info.type == CollectionType.RUN:
             runCollections.add_row((name,))
         else:
-            collections.add_row((name, collectionType))
+            collections.add_row((name, collection_info.type.name))
 
     return CollectionInfo(collections, runCollections)
 

diff --git a/python/lsst/daf/butler/script/removeRuns.py b/python/lsst/daf/butler/script/removeRuns.py
@@ -87,23 +87,17 @@ def _getCollectionInfo(
     """
     butler = Butler.from_config(repo)
     try:
-        collectionNames = list(
-            butler.registry.queryCollections(
-                collectionTypes=frozenset((CollectionType.RUN,)),
-                expression=collection,
-                includeChains=False,
-            )
-        )
+        collectionNames = butler.collections.query(collection, CollectionType.RUN, include_chains=False)
     except MissingCollectionError:
+        # Act as if no collections matched.
         collectionNames = []
-
     dataset_types = butler.registry.queryDatasetTypes(...)
     runs = []
     datasets: dict[str, int] = defaultdict(int)
     for collectionName in collectionNames:
-        assert butler.registry.getCollectionType(collectionName).name == "RUN"
-        parents = butler.registry.getCollectionParentChains(collectionName)
-        runs.append(RemoveRun(collectionName, list(parents)))
+        collection_info = butler.collections.get_info(collectionName, include_parents=True)
+        assert collection_info.type == CollectionType.RUN
+        runs.append(RemoveRun(collectionName, list(collection_info.parents)))
         with butler._query() as query:
             for dt in dataset_types:
                 results = query.datasets(dt, collections=collectionName)

diff --git a/python/lsst/daf/butler/script/retrieveArtifacts.py b/python/lsst/daf/butler/script/retrieveArtifacts.py
@@ -30,7 +30,6 @@
 __all__ = ("retrieveArtifacts",)
 
 import logging
-from types import EllipsisType
 from typing import TYPE_CHECKING
 
 from .._butler import Butler
@@ -85,7 +84,7 @@ def retrieveArtifacts(
         The destination URIs of every transferred artifact.
     """
     query_types = dataset_type or ...
-    query_collections: tuple[str, ...] | EllipsisType = collections or ...
+    query_collections: tuple[str, ...] = collections or tuple("*")
 
     butler = Butler.from_config(repo, writeable=False)
 
@@ -94,7 +93,7 @@ def retrieveArtifacts(
     dataset_types = butler.registry.queryDatasetTypes(query_types)
     refs: list[DatasetRef] = []
     with butler._query() as query:
-        expanded_collections = butler.registry.queryCollections(query_collections)
+        expanded_collections = butler.collections.query(query_collections)
         for dt in dataset_types:
             results = query.datasets(dt, collections=expanded_collections, find_first=find_first)
             if where:

diff --git a/python/lsst/daf/butler/script/transferDatasets.py b/python/lsst/daf/butler/script/transferDatasets.py
@@ -29,7 +29,6 @@
 __all__ = ("transferDatasets",)
 
 import logging
-from types import EllipsisType
 
 from lsst.daf.butler import DatasetRef
 
@@ -79,12 +78,12 @@ def transferDatasets(
     dest_butler = Butler.from_config(dest, writeable=True)
 
     dataset_type_expr = dataset_type or ...
-    collections_expr: tuple[str, ...] | EllipsisType = collections or ...
+    collections_expr: tuple[str, ...] = collections or tuple("*")
 
     dataset_types = source_butler.registry.queryDatasetTypes(dataset_type_expr)
     source_refs: list[DatasetRef] = []
     with source_butler._query() as query:
-        query_collections = source_butler.registry.queryCollections(collections_expr)
+        query_collections = source_butler.collections.query(collections_expr)
         # Loop over dataset types and accumulate.
         for dt in dataset_types:
             results = query.datasets(dt, collections=query_collections, find_first=find_first)

diff --git a/tests/test_cliCmdPruneDatasets.py b/tests/test_cliCmdPruneDatasets.py
@@ -35,7 +35,7 @@
 import lsst.daf.butler.registry.sql_registry
 import lsst.daf.butler.script
 from astropy.table import Table
-from lsst.daf.butler import CollectionType
+from lsst.daf.butler import CollectionInfo, CollectionType
 from lsst.daf.butler.cli.butler import cli as butlerCli
 from lsst.daf.butler.cli.cmd.commands import (
     pruneDatasets_askContinueMsg,
@@ -400,9 +400,9 @@ def test_purgeNoOp(self):
         )
 
     @patch.object(
-        lsst.daf.butler.registry.sql_registry.SqlRegistry,
-        "getCollectionType",
-        side_effect=lambda x: CollectionType.RUN,
+        lsst.daf.butler.direct_butler._direct_butler_collections.DirectButlerCollections,
+        "get_info",
+        side_effect=lambda x: CollectionInfo(name="run", type=CollectionType.RUN),
     )
     def test_purgeImpliedArgs(self, mockGetCollectionType):
         """Verify the arguments implied by --purge.
@@ -432,9 +432,9 @@ def test_purgeImpliedArgs(self, mockGetCollectionType):
         )
 
     @patch.object(
-        lsst.daf.butler.registry.sql_registry.SqlRegistry,
-        "getCollectionType",
-        side_effect=lambda x: CollectionType.RUN,
+        lsst.daf.butler.direct_butler._direct_butler_collections.DirectButlerCollections,
+        "get_info",
+        side_effect=lambda x: CollectionInfo(name="run", type=CollectionType.RUN),
     )
     def test_purgeImpliedArgsWithCollections(self, mockGetCollectionType):
         """Verify the arguments implied by --purge, with a COLLECTIONS."""
@@ -457,9 +457,9 @@ def test_purgeImpliedArgsWithCollections(self, mockGetCollectionType):
         )
 
     @patch.object(
-        lsst.daf.butler.registry.sql_registry.SqlRegistry,
-        "getCollectionType",
-        side_effect=lambda x: CollectionType.TAGGED,
+        lsst.daf.butler.direct_butler._direct_butler_collections.DirectButlerCollections,
+        "get_info",
+        side_effect=lambda x: CollectionInfo(name="myTaggedCollection", type=CollectionType.TAGGED),
     )
     def test_purgeOnNonRunCollection(self, mockGetCollectionType):
         """Verify calling run on a non-run collection fails with expected