Skip to content

Commit

Permalink
Use new collections API in more scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
timj committed Aug 15, 2024
1 parent 163f76a commit 70b33aa
Show file tree
Hide file tree
Showing 12 changed files with 51 additions and 72 deletions.
4 changes: 2 additions & 2 deletions python/lsst/daf/butler/script/_pruneDatasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,8 @@ def pruneDatasets(
# If purging, verify that the collection to purge is RUN type collection.
if purge_run:
butler = Butler.from_config(repo, without_datastore=True)
collectionType = butler.registry.getCollectionType(purge_run)
if collectionType is not CollectionType.RUN:
collection_info = butler.collections.get_info(purge_run)
if collection_info.type is not CollectionType.RUN:
return PruneDatasetsResult(
state=PruneDatasetsResult.State.ERR_PRUNE_ON_NOT_RUN, errDict=dict(collection=purge_run)
)
Expand Down
11 changes: 6 additions & 5 deletions python/lsst/daf/butler/script/certifyCalibrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,14 @@ def certifyCalibrations(
collection, instead of just the most recent one.
"""
butler = Butler.from_config(repo, writeable=True, without_datastore=True)
registry = butler.registry
timespan = Timespan(
begin=astropy.time.Time(begin_date, scale="tai") if begin_date is not None else None,
end=astropy.time.Time(end_date, scale="tai") if end_date is not None else None,
)
if not search_all_inputs and registry.getCollectionType(input_collection) is CollectionType.CHAINED:
input_collection = next(iter(registry.getCollectionChain(input_collection)))
if not search_all_inputs:
collection_info = butler.collections.get_info(input_collection)

Check warning on line 78 in python/lsst/daf/butler/script/certifyCalibrations.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/script/certifyCalibrations.py#L78

Added line #L78 was not covered by tests
if collection_info.type is CollectionType.CHAINED:
input_collection = collection_info.children[0]

Check warning on line 80 in python/lsst/daf/butler/script/certifyCalibrations.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/script/certifyCalibrations.py#L80

Added line #L80 was not covered by tests

with butler._query() as query:
results = query.datasets(dataset_type_name, collections=input_collection)
Expand All @@ -86,5 +87,5 @@ def certifyCalibrations(
raise RuntimeError(
f"No inputs found for dataset {dataset_type_name} in {input_collection}. {explanation}"
)
registry.registerCollection(output_collection, type=CollectionType.CALIBRATION)
registry.certify(output_collection, refs, timespan)
butler.registry.registerCollection(output_collection, type=CollectionType.CALIBRATION)
butler.registry.certify(output_collection, refs, timespan)

Check warning on line 91 in python/lsst/daf/butler/script/certifyCalibrations.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/script/certifyCalibrations.py#L90-L91

Added lines #L90 - L91 were not covered by tests
11 changes: 5 additions & 6 deletions python/lsst/daf/butler/script/collectionChain.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
from .._butler import Butler
from .._collection_type import CollectionType
from ..registry import MissingCollectionError
from ..registry.wildcards import CollectionWildcard


def collectionChain(
Expand Down Expand Up @@ -80,7 +79,7 @@ def collectionChain(
raise RuntimeError(f"Must provide children when defining a collection chain in mode {mode}.")

try:
butler.registry.getCollectionType(parent)
butler.collections.get_info(parent)
except MissingCollectionError:
# Create it -- but only if mode can work with empty chain.
if mode in ("redefine", "extend", "prepend"):
Expand All @@ -96,12 +95,11 @@ def collectionChain(
if flatten:
if mode not in ("redefine", "prepend", "extend"):
raise RuntimeError(f"'flatten' flag is not allowed for {mode}")
wildcard = CollectionWildcard.from_names(children)
children = butler.registry.queryCollections(wildcard, flattenChains=True)
children = butler.collections.query(children, flatten_chains=True)

_modify_collection_chain(butler, mode, parent, children)

return tuple(butler.registry.getCollectionChain(parent))
return butler.collections.get_info(parent).children


def _modify_collection_chain(butler: Butler, mode: str, parent: str, children: Iterable[str]) -> None:
Expand All @@ -125,7 +123,8 @@ def _find_children_to_pop(butler: Butler, parent: str, children: Iterable[str])
the given indexes.
"""
children = list(children)
current = butler.registry.getCollectionChain(parent)
collection_info = butler.collections.get_info(parent)
current = collection_info.children
n_current = len(current)
if children:

Expand Down
16 changes: 8 additions & 8 deletions python/lsst/daf/butler/script/exportCalibs.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def find_calibration_datasets(
RuntimeError
Raised if the collection to search is not a CALIBRATION collection.
"""
if butler.registry.getCollectionType(collection) != CollectionType.CALIBRATION:
if butler.collections.get_info(collection).type != CollectionType.CALIBRATION:
raise RuntimeError(f"Collection {collection} is not a CALIBRATION collection.")

exportDatasets = []
Expand Down Expand Up @@ -122,7 +122,7 @@ def exportCalibs(
butler = Butler.from_config(repo, writeable=False)

dataset_type_query = dataset_type or ...
collections_query = collections or ...
collections_query = collections or "*"

Check warning on line 125 in python/lsst/daf/butler/script/exportCalibs.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/script/exportCalibs.py#L125

Added line #L125 was not covered by tests

calibTypes = [
datasetType
Expand All @@ -133,18 +133,18 @@ def exportCalibs(
collectionsToExport = []
datasetsToExport = []

for collection in butler.registry.queryCollections(
for collection in butler.collections.query(
collections_query,
flattenChains=True,
includeChains=True,
collectionTypes={CollectionType.CALIBRATION, CollectionType.CHAINED},
flatten_chains=True,
include_chains=True,
collection_types={CollectionType.CALIBRATION, CollectionType.CHAINED},
):
log.info("Checking collection: %s", collection)

# Get collection information.
collectionsToExport.append(collection)
collectionType = butler.registry.getCollectionType(collection)
if collectionType == CollectionType.CALIBRATION:
info = butler.collections.get_info(collection)

Check warning on line 146 in python/lsst/daf/butler/script/exportCalibs.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/script/exportCalibs.py#L146

Added line #L146 was not covered by tests
if info.type == CollectionType.CALIBRATION:
exportDatasets = find_calibration_datasets(butler, collection, calibTypes)
datasetsToExport.extend(exportDatasets)

Expand Down
4 changes: 2 additions & 2 deletions python/lsst/daf/butler/script/queryDataIds.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,9 @@ def queryDataIds(
with butler._query() as query:
if datasets:
# Need to constrain results based on dataset type and collection.
query_collections = collections or ...
query_collections = collections or "*"

expanded_collections = butler.registry.queryCollections(query_collections)
expanded_collections = butler.collections.query(query_collections)

sub_query = query.join_dataset_search(dataset_types.pop(0), collections=expanded_collections)
for dt in dataset_types:
Expand Down
5 changes: 2 additions & 3 deletions python/lsst/daf/butler/script/queryDatasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
import dataclasses
from collections import defaultdict
from collections.abc import Iterable, Iterator
from types import EllipsisType
from typing import TYPE_CHECKING

import numpy as np
Expand Down Expand Up @@ -214,14 +213,14 @@ def getDatasets(self) -> Iterator[DatasetRef]:
Dataset references matching the given query criteria.
"""
datasetTypes = self._dataset_type_glob or ...
query_collections: Iterable[str] | EllipsisType = self._collections_wildcard or ...
query_collections: Iterable[str] = self._collections_wildcard or ["*"]

# Currently need to use old interface to get all the matching
# dataset types and loop over the dataset types executing a new
# query each time.
dataset_types = self.butler.registry.queryDatasetTypes(datasetTypes)
with self.butler._query() as query:
query_collections = self.butler.registry.queryCollections(query_collections)
query_collections = self.butler.collections.query(query_collections)
# Accumulate over dataset types.
for dt in dataset_types:
results = query.datasets(dt, collections=query_collections, find_first=self._find_first)
Expand Down
4 changes: 2 additions & 2 deletions python/lsst/daf/butler/script/queryDimensionRecords.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ def queryDimensionRecords(
with butler._query() as query:

if datasets:
query_collections = collections or ...
expanded_collections = butler.registry.queryCollections(query_collections)
query_collections = collections or "*"
expanded_collections = butler.collections.query(query_collections)
dataset_types = list(butler.registry.queryDatasetTypes(datasets))

sub_query = query.join_dataset_search(dataset_types.pop(0), collections=expanded_collections)
Expand Down
22 changes: 5 additions & 17 deletions python/lsst/daf/butler/script/removeCollections.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,30 +85,18 @@ def _getCollectionInfo(
"""
butler = Butler.from_config(repo, without_datastore=True)
try:
names = sorted(
butler.registry.queryCollections(
collectionTypes=frozenset(
(
CollectionType.RUN,
CollectionType.TAGGED,
CollectionType.CHAINED,
CollectionType.CALIBRATION,
)
),
expression=collection,
includeChains=True,
)
)
names = sorted(butler.collections.query(collection, include_chains=True))
except MissingCollectionError:
# Hide the error and act like no collections should be removed.
names = []
collections = Table(names=("Collection", "Collection Type"), dtype=(str, str))
runCollections = Table(names=("Collection",), dtype=(str,))
for name in names:
collectionType = butler.registry.getCollectionType(name).name
if collectionType == "RUN":
collection_info = butler.collections.get_info(name)
if collection_info.type == CollectionType.RUN:
runCollections.add_row((name,))
else:
collections.add_row((name, collectionType))
collections.add_row((name, collection_info.type.name))

return CollectionInfo(collections, runCollections)

Expand Down
16 changes: 5 additions & 11 deletions python/lsst/daf/butler/script/removeRuns.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,23 +87,17 @@ def _getCollectionInfo(
"""
butler = Butler.from_config(repo)
try:
collectionNames = list(
butler.registry.queryCollections(
collectionTypes=frozenset((CollectionType.RUN,)),
expression=collection,
includeChains=False,
)
)
collectionNames = butler.collections.query(collection, CollectionType.RUN, include_chains=False)
except MissingCollectionError:
# Act as if no collections matched.
collectionNames = []

dataset_types = butler.registry.queryDatasetTypes(...)
runs = []
datasets: dict[str, int] = defaultdict(int)
for collectionName in collectionNames:
assert butler.registry.getCollectionType(collectionName).name == "RUN"
parents = butler.registry.getCollectionParentChains(collectionName)
runs.append(RemoveRun(collectionName, list(parents)))
collection_info = butler.collections.get_info(collectionName, include_parents=True)
assert collection_info.type == CollectionType.RUN
runs.append(RemoveRun(collectionName, list(collection_info.parents)))
with butler._query() as query:
for dt in dataset_types:
results = query.datasets(dt, collections=collectionName)
Expand Down
5 changes: 2 additions & 3 deletions python/lsst/daf/butler/script/retrieveArtifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
__all__ = ("retrieveArtifacts",)

import logging
from types import EllipsisType
from typing import TYPE_CHECKING

from .._butler import Butler
Expand Down Expand Up @@ -85,7 +84,7 @@ def retrieveArtifacts(
The destination URIs of every transferred artifact.
"""
query_types = dataset_type or ...
query_collections: tuple[str, ...] | EllipsisType = collections or ...
query_collections: tuple[str, ...] = collections or tuple("*")

butler = Butler.from_config(repo, writeable=False)

Expand All @@ -94,7 +93,7 @@ def retrieveArtifacts(
dataset_types = butler.registry.queryDatasetTypes(query_types)
refs: list[DatasetRef] = []
with butler._query() as query:
expanded_collections = butler.registry.queryCollections(query_collections)
expanded_collections = butler.collections.query(query_collections)
for dt in dataset_types:
results = query.datasets(dt, collections=expanded_collections, find_first=find_first)
if where:
Expand Down
5 changes: 2 additions & 3 deletions python/lsst/daf/butler/script/transferDatasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
__all__ = ("transferDatasets",)

import logging
from types import EllipsisType

from lsst.daf.butler import DatasetRef

Expand Down Expand Up @@ -79,12 +78,12 @@ def transferDatasets(
dest_butler = Butler.from_config(dest, writeable=True)

dataset_type_expr = dataset_type or ...
collections_expr: tuple[str, ...] | EllipsisType = collections or ...
collections_expr: tuple[str, ...] = collections or tuple("*")

Check warning on line 81 in python/lsst/daf/butler/script/transferDatasets.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/script/transferDatasets.py#L81

Added line #L81 was not covered by tests

dataset_types = source_butler.registry.queryDatasetTypes(dataset_type_expr)
source_refs: list[DatasetRef] = []
with source_butler._query() as query:
query_collections = source_butler.registry.queryCollections(collections_expr)
query_collections = source_butler.collections.query(collections_expr)

Check warning on line 86 in python/lsst/daf/butler/script/transferDatasets.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/script/transferDatasets.py#L86

Added line #L86 was not covered by tests
# Loop over dataset types and accumulate.
for dt in dataset_types:
results = query.datasets(dt, collections=query_collections, find_first=find_first)
Expand Down
20 changes: 10 additions & 10 deletions tests/test_cliCmdPruneDatasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
import lsst.daf.butler.registry.sql_registry
import lsst.daf.butler.script
from astropy.table import Table
from lsst.daf.butler import CollectionType
from lsst.daf.butler import CollectionInfo, CollectionType
from lsst.daf.butler.cli.butler import cli as butlerCli
from lsst.daf.butler.cli.cmd.commands import (
pruneDatasets_askContinueMsg,
Expand Down Expand Up @@ -400,9 +400,9 @@ def test_purgeNoOp(self):
)

@patch.object(
lsst.daf.butler.registry.sql_registry.SqlRegistry,
"getCollectionType",
side_effect=lambda x: CollectionType.RUN,
lsst.daf.butler.direct_butler._direct_butler_collections.DirectButlerCollections,
"get_info",
side_effect=lambda x: CollectionInfo(name="run", type=CollectionType.RUN),
)
def test_purgeImpliedArgs(self, mockGetCollectionType):
"""Verify the arguments implied by --purge.
Expand Down Expand Up @@ -432,9 +432,9 @@ def test_purgeImpliedArgs(self, mockGetCollectionType):
)

@patch.object(
lsst.daf.butler.registry.sql_registry.SqlRegistry,
"getCollectionType",
side_effect=lambda x: CollectionType.RUN,
lsst.daf.butler.direct_butler._direct_butler_collections.DirectButlerCollections,
"get_info",
side_effect=lambda x: CollectionInfo(name="run", type=CollectionType.RUN),
)
def test_purgeImpliedArgsWithCollections(self, mockGetCollectionType):
"""Verify the arguments implied by --purge, with a COLLECTIONS."""
Expand All @@ -457,9 +457,9 @@ def test_purgeImpliedArgsWithCollections(self, mockGetCollectionType):
)

@patch.object(
lsst.daf.butler.registry.sql_registry.SqlRegistry,
"getCollectionType",
side_effect=lambda x: CollectionType.TAGGED,
lsst.daf.butler.direct_butler._direct_butler_collections.DirectButlerCollections,
"get_info",
side_effect=lambda x: CollectionInfo(name="myTaggedCollection", type=CollectionType.TAGGED),
)
def test_purgeOnNonRunCollection(self, mockGetCollectionType):
"""Verify calling run on a non-run collection fails with expected
Expand Down

0 comments on commit 70b33aa

Please sign in to comment.