Skip to content

Commit

Permalink
WIP: Formatter V2 cleanups
Browse files Browse the repository at this point in the history
  • Loading branch information
timj committed Jun 8, 2024
1 parent 73a6edb commit e271398
Show file tree
Hide file tree
Showing 8 changed files with 176 additions and 130 deletions.
224 changes: 145 additions & 79 deletions python/lsst/daf/butler/_formatter.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion python/lsst/daf/butler/formatters/astropyTable.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def get_write_extension(self) -> str:
# Other supported formats can be added here
raise RuntimeError(f"Requested file format '{format}' is not supported for Table")

Check warning on line 55 in python/lsst/daf/butler/formatters/astropyTable.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/formatters/astropyTable.py#L55

Added line #L55 was not covered by tests

def read_local_file(self, local_uri: ResourcePath, component: str | None = None) -> Any:
def read_from_local_file(self, local_uri: ResourcePath, component: str | None = None) -> Any:
pytype = self.file_descriptor.storageClass.pytype
if not issubclass(pytype, astropy.table.Table):
raise TypeError(f"Python type {pytype} does not seem to be a astropy Table type")

Check warning on line 60 in python/lsst/daf/butler/formatters/astropyTable.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/formatters/astropyTable.py#L60

Added line #L60 was not covered by tests
Expand Down
3 changes: 1 addition & 2 deletions python/lsst/daf/butler/formatters/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,10 @@
class JsonFormatter(TypelessFormatter):
"""Read and write JSON files."""

allow_remote_file_read = True
default_extension = ".json"
unsupported_parameters = None

def read_cached_file(self, uri: ResourcePath, component: str | None = None) -> Any:
def read_from_uri(self, uri: ResourcePath, component: str | None = None) -> Any:
# json.load() reads the entire file content into memory
# and is no different from json.loads(uri.read()). It does not attempt
# to support incremental reading to minimize memory usage.
Expand Down
14 changes: 8 additions & 6 deletions python/lsst/daf/butler/formatters/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,14 @@ class ButlerLogRecordsFormatter(FormatterV2):
model. In the future this may be changed to be able to read
`ButlerLogRecord` one at time from the file and return a subset
of records given some filtering parameters.
"""
# Log files can be large and ResourcePath.open() does not support
# readline() or __iter__ in all cases and ButlerLogRecords.from_stream
# does not use `.read()` for chunking. Therefore must use local file.
allow_remote_file_read = False
Notes
-----
Log files can be large and ResourcePath.open() does not support
``readline()`` or ``__iter__`` in all cases and
``ButlerLogRecords.from_stream`` does not use `.read()` for chunking.
Therefore must use local file.
"""

default_extension = ".json"
supported_extensions = frozenset({".log"})
Expand All @@ -60,7 +62,7 @@ def _get_read_pytype(self) -> type[ButlerLogRecords]:
raise RuntimeError(f"Python type {pytype} does not seem to be a ButlerLogRecords type")

Check warning on line 62 in python/lsst/daf/butler/formatters/logs.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/formatters/logs.py#L62

Added line #L62 was not covered by tests
return pytype

def read_local_file(self, uri: ResourcePath, component: str | None = None) -> Any:
def read_from_local_file(self, uri: ResourcePath, component: str | None = None) -> Any:
# ResourcePath open() cannot do a per-line read.
return self._get_read_pytype().from_file(uri.ospath)

Check warning on line 67 in python/lsst/daf/butler/formatters/logs.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/formatters/logs.py#L67

Added line #L67 was not covered by tests

Expand Down
3 changes: 1 addition & 2 deletions python/lsst/daf/butler/formatters/packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ class PackagesFormatterV2(FormatterV2):
options are ``yaml``, ``json``, and ``pickle``.
"""

allow_remote_file_read = True
supported_write_parameters = frozenset({"format"})
supported_extensions = frozenset({".yaml", ".pickle", ".pkl", ".json"})

Expand All @@ -57,7 +56,7 @@ def get_write_extension(self) -> str:
raise RuntimeError(f"Requested file format '{format}' is not supported for Packages")
return ext

Check warning on line 57 in python/lsst/daf/butler/formatters/packages.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/formatters/packages.py#L56-L57

Added lines #L56 - L57 were not covered by tests

def read_cached_file(self, uri: ResourcePath, component: str | None = None) -> Any:
def read_from_uri(self, uri: ResourcePath, component: str | None = None) -> Any:
# Read the full file using the class associated with the
# storage class it was originally written with.
# Read the bytes directly from resource. These are not going to be
Expand Down
3 changes: 1 addition & 2 deletions python/lsst/daf/butler/formatters/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,10 @@ class PickleFormatter(TypelessFormatter):
files.
"""

allow_remote_file_read = True
default_extension = ".pickle"
unsupported_parameters = None

def read_cached_file(self, uri: ResourcePath, component: str | None = None) -> Any:
def read_from_uri(self, uri: ResourcePath, component: str | None = None) -> Any:
# Read the pickle file directly from the resource into memory.
try:
data = pickle.loads(uri.read())
Expand Down
11 changes: 6 additions & 5 deletions python/lsst/daf/butler/formatters/yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,11 @@
class YamlFormatter(TypelessFormatter):
"""Read and write YAML files."""

allow_remote_file_read = True
default_extension = ".yaml"
unsupported_parameters = None
supported_write_parameters = frozenset({"unsafe_dump"})

def read_cached_file(self, uri: ResourcePath, component: str | None = None) -> Any:
def read_from_uri(self, uri: ResourcePath, component: str | None = None) -> Any:
# Can not use ResourcePath.open()
data = yaml.safe_load(uri.read())
return data
Expand Down Expand Up @@ -86,7 +85,7 @@ def to_bytes(self, in_memory_dataset: Any) -> bytes:
"""
converted = False
if hasattr(in_memory_dataset, "model_dump") and hasattr(in_memory_dataset, "model_dump_json"):
# Pydantic v2-like model if both model_dump() and model_json()
# Pydantic v2-like model if both model_dump() and model_dump_json()
# exist.
with contextlib.suppress(Exception):
in_memory_dataset = in_memory_dataset.model_dump()
Expand All @@ -105,10 +104,12 @@ def to_bytes(self, in_memory_dataset: Any) -> bytes:
in_memory_dataset = in_memory_dataset._asdict()

unsafe_dump = self.write_parameters.get("unsafe_dump", False)
# Now that Python always uses an order dict, do not sort keys
# on write so that order can be preserved on read.
if unsafe_dump:
serialized = yaml.dump(in_memory_dataset)
serialized = yaml.dump(in_memory_dataset, sort_keys=False)

Check warning on line 110 in python/lsst/daf/butler/formatters/yaml.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/formatters/yaml.py#L110

Added line #L110 was not covered by tests
else:
serialized = yaml.safe_dump(in_memory_dataset)
serialized = yaml.safe_dump(in_memory_dataset, sort_keys=False)
return serialized.encode()


Expand Down
46 changes: 13 additions & 33 deletions python/lsst/daf/butler/tests/_datasetsHelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,14 @@
from collections.abc import Iterable, Mapping
from typing import TYPE_CHECKING, Any

from lsst.daf.butler import DataCoordinate, DatasetRef, DatasetType, DimensionGroup, StorageClass
from lsst.daf.butler import (
DataCoordinate,
DatasetRef,
DatasetType,
DimensionGroup,
FormatterNotImplementedError,
StorageClass,
)
from lsst.daf.butler.datastore import Datastore
from lsst.daf.butler.formatters.yaml import YamlFormatter
from lsst.resources import ResourcePath
Expand Down Expand Up @@ -184,11 +191,11 @@ def makeDatastore(self, sub: str | None = None) -> Datastore:
class BadWriteFormatter(YamlFormatter):
"""A formatter that never works but does leave a file behind."""

def read_cached_file(self, uri: ResourcePath, component: str | None = None) -> Any:
raise NotImplementedError("This formatter can not read anything")
def read_from_uri(self, uri: ResourcePath, component: str | None = None) -> Any:
raise FormatterNotImplementedError("This formatter can not read anything")

Check warning on line 195 in python/lsst/daf/butler/tests/_datasetsHelper.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/tests/_datasetsHelper.py#L195

Added line #L195 was not covered by tests

def to_bytes(self, in_memory_dataset: Any) -> bytes:
raise NotImplementedError("This formatter can not serialize to bytes.")
raise FormatterNotImplementedError("This formatter can not serialize to bytes.")

Check warning on line 198 in python/lsst/daf/butler/tests/_datasetsHelper.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/tests/_datasetsHelper.py#L198

Added line #L198 was not covered by tests

def write_direct(
self,
Expand All @@ -199,22 +206,10 @@ def write_direct(
uri.write(b"")
raise RuntimeError("Did not succeed in writing file.")

def _readFile(self, path: str, pytype: type[Any] | None = None) -> Any:
raise NotImplementedError("This formatter can not read anything")

def _writeFile(self, inMemoryDataset: Any) -> None:
"""Write an empty file and then raise an exception."""
with open(self.fileDescriptor.location.path, "wb"):
pass
raise RuntimeError("Did not succeed in writing file")


class BadNoWriteFormatter(BadWriteFormatter):
"""A formatter that always fails without writing anything."""

def _writeFile(self, inMemoryDataset: Any) -> None:
raise RuntimeError("Did not writing anything at all")

def write_direct(
self,
in_memory_dataset: Any,
Expand All @@ -227,31 +222,16 @@ def write_direct(
class MultiDetectorFormatter(YamlFormatter):
"""A formatter that requires a detector to be specified in the dataID."""

def read_cached_file(self, uri: ResourcePath, component: str | None = None) -> Any:
def read_from_uri(self, uri: ResourcePath, component: str | None = None) -> Any:
if self.data_id is None:
raise RuntimeError("This formatter requires a dataId")
if "detector" not in self.data_id:
raise RuntimeError("This formatter requires detector to be present in dataId")

key = f"detector{self.data_id['detector']}"

data = super().read_cached_file(uri, component)
data = super().read_from_uri(uri, component)
if key not in data:
raise RuntimeError(f"Could not find '{key}' in data file.")

Check warning on line 235 in python/lsst/daf/butler/tests/_datasetsHelper.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/tests/_datasetsHelper.py#L235

Added line #L235 was not covered by tests

return data[key]

def _writeFile(self, inMemoryDataset: Any) -> None:
raise NotImplementedError("Can not write")

def _fromBytes(self, serializedDataset: bytes, pytype: type[Any] | None = None) -> Any:
data = super()._fromBytes(serializedDataset)
if self.dataId is None:
raise RuntimeError("This formatter requires a dataId")
if "detector" not in self.dataId:
raise RuntimeError("This formatter requires detector to be present in dataId")
key = f"detector{self.dataId['detector']}"
assert pytype is not None
if key in data:
return pytype(data[key])
raise RuntimeError(f"Could not find '{key}' in data file")

0 comments on commit e271398

Please sign in to comment.