From e0748aacd443cd8ca8aa46a4c8baf29a9b2159e9 Mon Sep 17 00:00:00 2001 From: clintval Date: Mon, 21 Aug 2023 17:00:34 -0700 Subject: [PATCH] feat: offer GZIP support for Metric IO --- fgpyo/util/metric.py | 5 +++-- fgpyo/util/tests/test_metric.py | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/fgpyo/util/metric.py b/fgpyo/util/metric.py index 9379114c..2bff416f 100644 --- a/fgpyo/util/metric.py +++ b/fgpyo/util/metric.py @@ -115,6 +115,7 @@ import attr +from fgpyo import io from fgpyo.util import inspect MetricType = TypeVar("MetricType") @@ -161,7 +162,7 @@ def read(cls, path: Path, ignore_extra_fields: bool = True) -> Iterator[Any]: ignore_extra_fields: True to ignore any extra columns, False to raise an exception. """ parsers = cls._parsers() - with path.open("r") as reader: + with io.to_reader(path) as reader: header: List[str] = reader.readline().rstrip("\r\n").split("\t") # check the header class_fields = set(cls.header()) @@ -234,7 +235,7 @@ def write(cls, path: Path, *values: MetricType) -> None: path: path to the output file values: zero or more metrics. """ - with path.open("w") as writer: + with io.to_writer(path) as writer: writer.write("\t".join(cls.header())) writer.write("\n") for value in values: diff --git a/fgpyo/util/tests/test_metric.py b/fgpyo/util/tests/test_metric.py index ab8fd8ad..44c9068a 100644 --- a/fgpyo/util/tests/test_metric.py +++ b/fgpyo/util/tests/test_metric.py @@ -1,4 +1,5 @@ import enum +import gzip from pathlib import Path from typing import Any from typing import Callable @@ -158,6 +159,20 @@ def test_metrics_roundtrip(tmpdir: TmpDir) -> None: assert metrics == DUMMY_METRICS +def test_metric_roundtrip_gzip(tmp_path: Path) -> None: + path: Path = Path(tmp_path) / "metrics.txt.gz" + + DummyMetric.write(path, *DUMMY_METRICS) + + with gzip.open(path, "r") as handle: + handle.read(1) # Will raise an exception if not a GZIP file. + + metrics: List[DummyMetric] = list(DummyMetric.read(path=path)) + + assert len(metrics) == len(DUMMY_METRICS) + assert metrics == DUMMY_METRICS + + def test_metrics_read_extra_columns(tmpdir: TmpDir) -> None: person = Person(name="Max", age=42) path = Path(tmpdir) / "metrics.txt"