Skip to content

Commit

Permalink
Add ability to disable file validation tracking on butler import
Browse files Browse the repository at this point in the history
  • Loading branch information
timj committed Jul 13, 2024
1 parent d760438 commit d8ed3d8
Show file tree
Hide file tree
Showing 9 changed files with 38 additions and 2 deletions.
8 changes: 8 additions & 0 deletions python/lsst/daf/butler/_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1260,6 +1260,7 @@ def import_(
format: str | None = None,
transfer: str | None = None,
skip_dimensions: set | None = None,
record_validation_info: bool = True,
) -> None:
"""Import datasets into this repository that were exported from a
different butler repository via `~lsst.daf.butler.Butler.export`.
Expand All @@ -1285,6 +1286,13 @@ def import_(
Transfer mode passed to `~lsst.daf.butler.Datastore.ingest`.
skip_dimensions : `set`, optional
Names of dimensions that should be skipped and not imported.
record_validation_info : `bool`, optional
If `True`, the default, the datastore can record validation
information associated with the file. If `False` the datastore
will not attempt to track any information such as checksums
or file sizes. This can be useful if such information is tracked
in an external system or if the file is to be compressed in place.
It is up to the datastore whether this parameter is relevant.
Raises
------
Expand Down
1 change: 1 addition & 0 deletions python/lsst/daf/butler/cli/cmd/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ def associate(**kwargs: Any) -> None:
metavar=typeStrAcceptsMultiple,
help="Dimensions that should be skipped during import",
)
@track_file_attrs_option()
@options_file_option()
def butler_import(*args: Any, **kwargs: Any) -> None:
"""Import data into a butler repository."""
Expand Down
2 changes: 2 additions & 0 deletions python/lsst/daf/butler/direct_butler/_direct_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1626,6 +1626,7 @@ def import_(
format: str | None = None,
transfer: str | None = None,
skip_dimensions: set | None = None,
record_validation_info: bool = True,
) -> None:
# Docstring inherited.
if not self.isWriteable():
Expand Down Expand Up @@ -1679,6 +1680,7 @@ def doImport(importStream: TextIO | ResourceHandleProtocol) -> None:
directory=directory,
transfer=transfer,
skip_dimensions=skip_dimensions,
record_validation_info=record_validation_info,
)

if isinstance(filename, ResourcePath):
Expand Down
1 change: 1 addition & 0 deletions python/lsst/daf/butler/remote_butler/_remote_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ def import_(
format: str | None = None,
transfer: str | None = None,
skip_dimensions: set | None = None,
record_validation_info: bool = True,
) -> None:
# Docstring inherited.
raise NotImplementedError()
Expand Down
5 changes: 5 additions & 0 deletions python/lsst/daf/butler/script/butlerImport.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def butlerImport(
export_file: str | TextIO | None,
transfer: str | None,
skip_dimensions: Iterable[str] | None,
track_file_attrs: bool = True,
) -> None:
"""Import data into a butler repository.
Expand All @@ -58,6 +59,10 @@ def butlerImport(
The external data transfer type.
skip_dimensions : `list`, or `None`
Dimensions that should be skipped.
track_file_attrs : `bool`, optional
Control whether file attributes such as the size or checksum should
be tracked by the datastore. Whether this parameter is honored
depends on the specific datastore implementation.
"""
butler = Butler.from_config(repo, writeable=True)

Expand Down
2 changes: 2 additions & 0 deletions python/lsst/daf/butler/tests/hybrid_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,13 +277,15 @@ def import_(
format: str | None = None,
transfer: str | None = None,
skip_dimensions: set | None = None,
record_validation_info: bool = True,
) -> None:
self._direct_butler.import_(
directory=directory,
filename=filename,
format=format,
transfer=transfer,
skip_dimensions=skip_dimensions,
record_validation_info=record_validation_info,
)

def transfer_dimension_records_from(
Expand Down
9 changes: 9 additions & 0 deletions python/lsst/daf/butler/transfers/_interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ def load(
directory: ResourcePathExpression | None = None,
transfer: str | None = None,
skip_dimensions: set | None = None,
record_validation_info: bool = True,
) -> None:
"""Import information associated with the backend into the given
registry and datastore.
Expand All @@ -180,5 +181,13 @@ def load(
Dimensions that should be skipped and not imported. This can
be useful when importing into a registry that already knows
about a specific instrument.
record_validation_info : `bool`, optional
If `True`, the default, the datastore can record validation
information associated with the file. If `False` the datastore
will not attempt to track any information such as checksums
or file sizes. This can be useful if such information is tracked
in an external system or if the file is to be compressed in place.
It is up to the underlying datastore whether this parameter is
relevant.
"""
raise NotImplementedError()
3 changes: 2 additions & 1 deletion python/lsst/daf/butler/transfers/_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,7 @@ def load(
directory: ResourcePathExpression | None = None,
transfer: str | None = None,
skip_dimensions: set | None = None,
record_validation_info: bool = True,
) -> None:
# Docstring inherited from RepoImportBackend.load.
# Must ensure we insert in order supported by the universe.
Expand Down Expand Up @@ -696,7 +697,7 @@ def load(
fileDatasets.append(fileDataset)
# Ingest everything into the datastore at once.
if datastore is not None and fileDatasets:
datastore.ingest(*fileDatasets, transfer=transfer)
datastore.ingest(*fileDatasets, transfer=transfer, record_validation_info=record_validation_info)
# Associate datasets with tagged collections.
for collection, dataset_ids in self.tagAssociations.items():
self.registry.associate(collection, [self.refsByFileId[i] for i in dataset_ids])
Expand Down
9 changes: 8 additions & 1 deletion tests/test_cliCmdImport.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,14 @@ class ImportTestCase(CliCmdTestBase, unittest.TestCase):

@staticmethod
def defaultExpected():
return dict(repo=None, transfer="auto", directory=None, skip_dimensions=(), export_file=None)
return dict(
repo=None,
transfer="auto",
directory=None,
skip_dimensions=(),
export_file=None,
track_file_attrs=True,
)

@staticmethod
def command():
Expand Down

0 comments on commit d8ed3d8

Please sign in to comment.