Skip to content

Commit

Permalink
Write V2 formatters
Browse files Browse the repository at this point in the history
  • Loading branch information
timj committed May 18, 2024
1 parent 2d69742 commit f68a111
Show file tree
Hide file tree
Showing 9 changed files with 568 additions and 6 deletions.
33 changes: 32 additions & 1 deletion python/lsst/daf/butler/formatters/astropyTable.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,41 @@
import os.path
from typing import Any

import astropy.table
from lsst.daf.butler import FormatterV2
from lsst.resources import ResourcePath

from .file import FileFormatter


class AstropyTableFormatter(FileFormatter):
class AstropyTableFormatter(FormatterV2):
"""Read and write `astropy.table.Table` objects.
Currently assumes only local file reads are possible.
"""

supported_write_parameters = frozenset({"format"})
supported_extensions = frozenset({".ecsv"})

def get_write_extension(self) -> str:
# Default to ECSV but allow configuration via write parameter
format = self.write_parameters.get("format", "ecsv")
if format == "ecsv":
return ".ecsv"
# Other supported formats can be added here
raise RuntimeError(f"Requested file format '{format}' is not supported for Table")

Check warning on line 55 in python/lsst/daf/butler/formatters/astropyTable.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/formatters/astropyTable.py#L55

Added line #L55 was not covered by tests

def read_local_file(self, local_uri: ResourcePath, component: str | None = None) -> Any:
pytype = self.file_descriptor.storageClass.pytype
if not issubclass(pytype, astropy.table.Table):
raise TypeError(f"Python type {pytype} does not seem to be a astropy Table type")

Check warning on line 60 in python/lsst/daf/butler/formatters/astropyTable.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/formatters/astropyTable.py#L60

Added line #L60 was not covered by tests
return pytype.read(local_uri.ospath) # type: ignore

def write_local_file(self, in_memory_dataset: Any, uri: ResourcePath) -> None:
in_memory_dataset.write(uri.ospath)


class AstropyTableFormatterV1(FileFormatter):
"""Interface for reading and writing astropy.Table objects
in either ECSV or FITS format.
"""
Expand Down
60 changes: 59 additions & 1 deletion python/lsst/daf/butler/formatters/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,68 @@
import json
from typing import Any

from lsst.resources import ResourcePath

from .file import FileFormatter
from .typeless import TypelessFormatter


class JsonFormatter(TypelessFormatter):
"""Read and write JSON files."""

allow_remote_file_read = True
default_extension = ".json"
unsupported_parameters = None

def read_cached_file(self, uri: ResourcePath, component: str | None = None) -> Any:
# json.load() reads the entire file content into memory
# and is no different from json.loads(uri.read()). It does not attempt
# to support incremental reading to minimize memory usage.
# This means the JSON string always has to be read entirely into
# memory regardless of being remote or local.
json_bytes = uri.read()

try:
data = json.loads(json_bytes)
except json.JSONDecodeError:
data = None

Check warning on line 61 in python/lsst/daf/butler/formatters/json.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/formatters/json.py#L60-L61

Added lines #L60 - L61 were not covered by tests

return data

def to_bytes(self, in_memory_dataset: Any) -> bytes:
"""Write the in memory dataset to a bytestring.
Parameters
----------
in_memory_dataset : `object`
Object to serialize.
Returns
-------
serialized_dataset : `bytes`
Bytes representing the serialized dataset.
Raises
------
Exception
The object could not be serialized.
"""
# Try different standardized methods for native json.
# For example, Pydantic models have a .model_dump_json method.
# v1 models without compatibility layer will need .json()
with contextlib.suppress(AttributeError):
return in_memory_dataset.model_dump_json().encode()
with contextlib.suppress(AttributeError):
return in_memory_dataset.json().encode()

if dataclasses.is_dataclass(in_memory_dataset):
in_memory_dataset = dataclasses.asdict(in_memory_dataset)
elif hasattr(in_memory_dataset, "_asdict"):
in_memory_dataset = in_memory_dataset._asdict()
return json.dumps(in_memory_dataset, ensure_ascii=False).encode()


class JsonFormatter(FileFormatter):
class JsonFormatterV1(FileFormatter):
"""Formatter implementation for JSON files."""

extension = ".json"
Expand Down
39 changes: 38 additions & 1 deletion python/lsst/daf/butler/formatters/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,49 @@

from typing import Any

from lsst.daf.butler import FormatterV2
from lsst.daf.butler.logging import ButlerLogRecords
from lsst.resources import ResourcePath

from .json import JsonFormatter


class ButlerLogRecordsFormatter(JsonFormatter):
class ButlerLogRecordsFormatter(FormatterV2):
"""Read and write log records in JSON format.
This is a naive implementation that treats everything as a pydantic.
model. In the future this may be changed to be able to read
`ButlerLogRecord` one at time from the file and return a subset
of records given some filtering parameters.
"""

# Log files can be large and ResourcePath.open() does not support
# readline() or __iter__ in all cases and ButlerLogRecords.from_stream
# does not use `.read()` for chunking. Therefore must use local file.
allow_remote_file_read = False

default_extension = ".json"
supported_extensions = frozenset({".log"})

def _get_read_pytype(self) -> type[ButlerLogRecords]:
"""Get the Python type to allow for subclasses."""
pytype = self.file_descriptor.storageClass.pytype
if not issubclass(pytype, ButlerLogRecords):
raise RuntimeError(f"Python type {pytype} does not seem to be a ButlerLogRecords type")

Check warning on line 60 in python/lsst/daf/butler/formatters/logs.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/formatters/logs.py#L60

Added line #L60 was not covered by tests
return pytype

def read_local_file(self, uri: ResourcePath, component: str | None = None) -> Any:
# ResourcePath open() cannot do a per-line read.
return self._get_read_pytype().from_file(uri.ospath)

Check warning on line 65 in python/lsst/daf/butler/formatters/logs.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/formatters/logs.py#L65

Added line #L65 was not covered by tests

def read_from_bytes(self, serialized_bytes: bytes, component: str | None = None) -> Any:
return self._get_read_pytype().from_raw(serialized_bytes)

def to_bytes(self, in_memory_dataset: Any) -> bytes:
return in_memory_dataset.model_dump_json(exclude_unset=True, exclude_defaults=True).encode()


class ButlerLogRecordsFormatterV1(JsonFormatter):
"""Read and write log records in JSON format.
This is a naive implementation that treats everything as a pydantic.
Expand Down
22 changes: 21 additions & 1 deletion python/lsst/daf/butler/formatters/matplotlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,30 @@

from typing import Any

from lsst.daf.butler import FormatterV2
from lsst.resources import ResourcePath

from .file import FileFormatter


class MatplotlibFormatter(FileFormatter):
class MatplotlibFormatter(FormatterV2):
"""Format matplotlib figures.
Does not support writes.
"""

default_extension = ".png"

def write_local_file(self, in_memory_dataset: Any, uri: ResourcePath) -> None:
# Format is not forced so if there is no extension in uri one will
# be added and the datastore will not know what happened.
# The fname for savefig can take a file descriptor. If it works
# with ResourcePath handles then it may be possible to do direct
# writes. Alternatively, implement with BytesIO and do direct put.
in_memory_dataset.savefig(uri.ospath)


class MatplotlibFormatterV1(FileFormatter):
"""Interface for writing matplotlib figures."""

extension = ".png"
Expand Down
55 changes: 55 additions & 0 deletions python/lsst/daf/butler/formatters/packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,65 @@
import os.path
from typing import Any

from lsst.daf.butler import FormatterV2
from lsst.daf.butler.formatters.file import FileFormatter
from lsst.resources import ResourcePath
from lsst.utils.packages import Packages


class PackagesFormatterV2(FormatterV2):
"""Interface for reading and writing `~lsst.utils.packages.Packages`.
This formatter supports write parameters:
* ``format``: The file format to use to write the package data. Allowed
options are ``yaml``, ``json``, and ``pickle``.
"""

allow_remote_file_read = True
supported_write_parameters = frozenset({"format"})
supported_extensions = frozenset({".yaml", ".pickle", ".pkl", ".json"})

def get_write_extension(self) -> str:
# Default to YAML but allow configuration via write parameter
format = self.write_parameters.get("format", "yaml")
ext = "." + format

Check warning on line 55 in python/lsst/daf/butler/formatters/packages.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/formatters/packages.py#L54-L55

Added lines #L54 - L55 were not covered by tests
if ext not in self.supported_extensions:
raise RuntimeError(f"Requested file format '{format}' is not supported for Packages")
return ext

Check warning on line 58 in python/lsst/daf/butler/formatters/packages.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/formatters/packages.py#L57-L58

Added lines #L57 - L58 were not covered by tests

def read_cached_file(self, uri: ResourcePath, component: str | None = None) -> Any:
# Read the full file using the class associated with the
# storage class it was originally written with.
# Read the bytes directly from resource. These are not going to be
# large.
pytype = self.file_descriptor.storageClass.pytype
assert issubclass(pytype, Packages) # for mypy
format = uri.getExtension().lstrip(".") # .yaml -> yaml
return pytype.fromBytes(uri.read(), format)

Check warning on line 68 in python/lsst/daf/butler/formatters/packages.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/formatters/packages.py#L65-L68

Added lines #L65 - L68 were not covered by tests

def to_bytes(self, in_memory_dataset: Any) -> bytes:
"""Write the in memory dataset to a bytestring.
Parameters
----------
in_memory_dataset : `object`
Object to serialize.
Returns
-------
serializedDataset : `bytes`
YAML string encoded to bytes.
Raises
------
Exception
The object could not be serialized.
"""
format = self.get_write_extension().lstrip(".")
return in_memory_dataset.toBytes(format)

Check warning on line 89 in python/lsst/daf/butler/formatters/packages.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/formatters/packages.py#L88-L89

Added lines #L88 - L89 were not covered by tests


class PackagesFormatter(FileFormatter):
"""Interface for reading and writing `~lsst.utils.packages.Packages`.
Expand Down
27 changes: 26 additions & 1 deletion python/lsst/daf/butler/formatters/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,35 @@
import pickle
from typing import Any

from lsst.resources import ResourcePath

from .file import FileFormatter
from .typeless import TypelessFormatter


class PickleFormatter(TypelessFormatter):
"""Interface for reading and writing Python objects to and from pickle
files.
"""

allow_remote_file_read = True
default_extension = ".pickle"
unsupported_parameters = None

def read_cached_file(self, uri: ResourcePath, component: str | None = None) -> Any:
# Read the pickle file directly from the resource into memory.
try:
data = pickle.loads(uri.read())
except pickle.PicklingError:
data = None

Check warning on line 57 in python/lsst/daf/butler/formatters/pickle.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler/formatters/pickle.py#L57

Added line #L57 was not covered by tests

return data

def to_bytes(self, in_memory_dataset: Any) -> bytes:
return pickle.dumps(in_memory_dataset, protocol=-1)


class PickleFormatter(FileFormatter):
class PickleFormatterX(FileFormatter):
"""Interface for reading and writing Python objects to and from pickle
files.
"""
Expand Down
Loading

0 comments on commit f68a111

Please sign in to comment.