From d8ed3d8d4bd7e69f752be0915380f6681310a4d3 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Fri, 12 Jul 2024 17:21:00 -0700 Subject: [PATCH] Add ability to disable file validation tracking on butler import --- python/lsst/daf/butler/_butler.py | 8 ++++++++ python/lsst/daf/butler/cli/cmd/commands.py | 1 + python/lsst/daf/butler/direct_butler/_direct_butler.py | 2 ++ python/lsst/daf/butler/remote_butler/_remote_butler.py | 1 + python/lsst/daf/butler/script/butlerImport.py | 5 +++++ python/lsst/daf/butler/tests/hybrid_butler.py | 2 ++ python/lsst/daf/butler/transfers/_interfaces.py | 9 +++++++++ python/lsst/daf/butler/transfers/_yaml.py | 3 ++- tests/test_cliCmdImport.py | 9 ++++++++- 9 files changed, 38 insertions(+), 2 deletions(-) diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py index 51b30e57c8..2c16478aa9 100644 --- a/python/lsst/daf/butler/_butler.py +++ b/python/lsst/daf/butler/_butler.py @@ -1260,6 +1260,7 @@ def import_( format: str | None = None, transfer: str | None = None, skip_dimensions: set | None = None, + record_validation_info: bool = True, ) -> None: """Import datasets into this repository that were exported from a different butler repository via `~lsst.daf.butler.Butler.export`. @@ -1285,6 +1286,13 @@ def import_( Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`. skip_dimensions : `set`, optional Names of dimensions that should be skipped and not imported. + record_validation_info : `bool`, optional + If `True`, the default, the datastore can record validation + information associated with the file. If `False` the datastore + will not attempt to track any information such as checksums + or file sizes. This can be useful if such information is tracked + in an external system or if the file is to be compressed in place. + It is up to the datastore whether this parameter is relevant. Raises ------ diff --git a/python/lsst/daf/butler/cli/cmd/commands.py b/python/lsst/daf/butler/cli/cmd/commands.py index 1339588d3b..66f64b197c 100644 --- a/python/lsst/daf/butler/cli/cmd/commands.py +++ b/python/lsst/daf/butler/cli/cmd/commands.py @@ -114,6 +114,7 @@ def associate(**kwargs: Any) -> None: metavar=typeStrAcceptsMultiple, help="Dimensions that should be skipped during import", ) +@track_file_attrs_option() @options_file_option() def butler_import(*args: Any, **kwargs: Any) -> None: """Import data into a butler repository.""" diff --git a/python/lsst/daf/butler/direct_butler/_direct_butler.py b/python/lsst/daf/butler/direct_butler/_direct_butler.py index 843cf533cb..a71865aed4 100644 --- a/python/lsst/daf/butler/direct_butler/_direct_butler.py +++ b/python/lsst/daf/butler/direct_butler/_direct_butler.py @@ -1626,6 +1626,7 @@ def import_( format: str | None = None, transfer: str | None = None, skip_dimensions: set | None = None, + record_validation_info: bool = True, ) -> None: # Docstring inherited. if not self.isWriteable(): @@ -1679,6 +1680,7 @@ def doImport(importStream: TextIO | ResourceHandleProtocol) -> None: directory=directory, transfer=transfer, skip_dimensions=skip_dimensions, + record_validation_info=record_validation_info, ) if isinstance(filename, ResourcePath): diff --git a/python/lsst/daf/butler/remote_butler/_remote_butler.py b/python/lsst/daf/butler/remote_butler/_remote_butler.py index 612a6a4190..3392fc8791 100644 --- a/python/lsst/daf/butler/remote_butler/_remote_butler.py +++ b/python/lsst/daf/butler/remote_butler/_remote_butler.py @@ -477,6 +477,7 @@ def import_( format: str | None = None, transfer: str | None = None, skip_dimensions: set | None = None, + record_validation_info: bool = True, ) -> None: # Docstring inherited. raise NotImplementedError() diff --git a/python/lsst/daf/butler/script/butlerImport.py b/python/lsst/daf/butler/script/butlerImport.py index a4af72b85a..97ee2aa729 100644 --- a/python/lsst/daf/butler/script/butlerImport.py +++ b/python/lsst/daf/butler/script/butlerImport.py @@ -38,6 +38,7 @@ def butlerImport( export_file: str | TextIO | None, transfer: str | None, skip_dimensions: Iterable[str] | None, + track_file_attrs: bool = True, ) -> None: """Import data into a butler repository. @@ -58,6 +59,10 @@ def butlerImport( The external data transfer type. skip_dimensions : `list`, or `None` Dimensions that should be skipped. + track_file_attrs : `bool`, optional + Control whether file attributes such as the size or checksum should + be tracked by the datastore. Whether this parameter is honored + depends on the specific datastore implementation. """ butler = Butler.from_config(repo, writeable=True) diff --git a/python/lsst/daf/butler/tests/hybrid_butler.py b/python/lsst/daf/butler/tests/hybrid_butler.py index b4e00bce2c..c64d2a4ad3 100644 --- a/python/lsst/daf/butler/tests/hybrid_butler.py +++ b/python/lsst/daf/butler/tests/hybrid_butler.py @@ -277,6 +277,7 @@ def import_( format: str | None = None, transfer: str | None = None, skip_dimensions: set | None = None, + record_validation_info: bool = True, ) -> None: self._direct_butler.import_( directory=directory, @@ -284,6 +285,7 @@ def import_( format=format, transfer=transfer, skip_dimensions=skip_dimensions, + record_validation_info=record_validation_info, ) def transfer_dimension_records_from( diff --git a/python/lsst/daf/butler/transfers/_interfaces.py b/python/lsst/daf/butler/transfers/_interfaces.py index c08b7fa1af..4e825da754 100644 --- a/python/lsst/daf/butler/transfers/_interfaces.py +++ b/python/lsst/daf/butler/transfers/_interfaces.py @@ -160,6 +160,7 @@ def load( directory: ResourcePathExpression | None = None, transfer: str | None = None, skip_dimensions: set | None = None, + record_validation_info: bool = True, ) -> None: """Import information associated with the backend into the given registry and datastore. @@ -180,5 +181,13 @@ def load( Dimensions that should be skipped and not imported. This can be useful when importing into a registry that already knows about a specific instrument. + record_validation_info : `bool`, optional + If `True`, the default, the datastore can record validation + information associated with the file. If `False` the datastore + will not attempt to track any information such as checksums + or file sizes. This can be useful if such information is tracked + in an external system or if the file is to be compressed in place. + It is up to the underlying datastore whether this parameter is + relevant. """ raise NotImplementedError() diff --git a/python/lsst/daf/butler/transfers/_yaml.py b/python/lsst/daf/butler/transfers/_yaml.py index 33567949de..64461f279e 100644 --- a/python/lsst/daf/butler/transfers/_yaml.py +++ b/python/lsst/daf/butler/transfers/_yaml.py @@ -649,6 +649,7 @@ def load( directory: ResourcePathExpression | None = None, transfer: str | None = None, skip_dimensions: set | None = None, + record_validation_info: bool = True, ) -> None: # Docstring inherited from RepoImportBackend.load. # Must ensure we insert in order supported by the universe. @@ -696,7 +697,7 @@ def load( fileDatasets.append(fileDataset) # Ingest everything into the datastore at once. if datastore is not None and fileDatasets: - datastore.ingest(*fileDatasets, transfer=transfer) + datastore.ingest(*fileDatasets, transfer=transfer, record_validation_info=record_validation_info) # Associate datasets with tagged collections. for collection, dataset_ids in self.tagAssociations.items(): self.registry.associate(collection, [self.refsByFileId[i] for i in dataset_ids]) diff --git a/tests/test_cliCmdImport.py b/tests/test_cliCmdImport.py index 3c91f1f579..adbfa0f568 100644 --- a/tests/test_cliCmdImport.py +++ b/tests/test_cliCmdImport.py @@ -42,7 +42,14 @@ class ImportTestCase(CliCmdTestBase, unittest.TestCase): @staticmethod def defaultExpected(): - return dict(repo=None, transfer="auto", directory=None, skip_dimensions=(), export_file=None) + return dict( + repo=None, + transfer="auto", + directory=None, + skip_dimensions=(), + export_file=None, + track_file_attrs=True, + ) @staticmethod def command():