From 031486cb4cc22347f0f72e5b8422cc123f34b147 Mon Sep 17 00:00:00 2001 From: clintval Date: Mon, 21 Aug 2023 16:28:22 -0700 Subject: [PATCH] feat: offer GZIP support for Metric IO --- fgpyo/fasta/tests/test_builder.py | 5 ++--- fgpyo/sam/tests/test_sam.py | 9 ++++---- fgpyo/util/metric.py | 5 +++-- fgpyo/util/tests/test_metric.py | 36 +++++++++++++++++++++---------- 4 files changed, 34 insertions(+), 21 deletions(-) diff --git a/fgpyo/fasta/tests/test_builder.py b/fgpyo/fasta/tests/test_builder.py index 950295c0..9d6a6fd5 100644 --- a/fgpyo/fasta/tests/test_builder.py +++ b/fgpyo/fasta/tests/test_builder.py @@ -4,7 +4,6 @@ from tempfile import NamedTemporaryFile as NamedTemp import pytest -from py._path.local import LocalPath as TmpDir from pytest import raises from fgpyo.fasta.builder import FastaBuilder @@ -64,14 +63,14 @@ def test_bases_string_from_ContigBuilder_add( bases: str, times: int, expected: str, - tmpdir: TmpDir, + tmp_path: Path, ) -> None: """ Reads bases back from fasta and checks that extra spaces are removed and bases are uppercase """ builder = FastaBuilder() builder.add(name).add(bases, times) - with NamedTemp(suffix=".fa", dir=tmpdir, mode="w", delete=True) as fp: + with NamedTemp(suffix=".fa", dir=tmp_path, mode="w", delete=True) as fp: builder.to_file(Path(fp.name)) with open(fp.name, "r") as read_fp: for line in read_fp.readlines(): diff --git a/fgpyo/sam/tests/test_sam.py b/fgpyo/sam/tests/test_sam.py index 80463b1d..b05ea853 100755 --- a/fgpyo/sam/tests/test_sam.py +++ b/fgpyo/sam/tests/test_sam.py @@ -10,7 +10,6 @@ import pysam import pytest -from py._path.local import LocalPath as TmpDir from pysam import AlignmentHeader import fgpyo.sam as sam @@ -141,10 +140,10 @@ def test_sam_file_open_writing( file_type: SamFileType, expected_records: List[pysam.AlignedSegment], header_dict: AlignmentHeader, - tmpdir: TmpDir, + tmp_path: Path, ) -> None: # use header as a keyword argument - with NamedTemp(suffix=file_type.extension, dir=tmpdir, mode="w", delete=False) as fp: + with NamedTemp(suffix=file_type.extension, dir=tmp_path, mode="w", delete=False) as fp: kwargs = {"header": header_dict} with sam._pysam_open( path=fp.file, open_for_reading=False, file_type=file_type, **kwargs # type: ignore @@ -155,11 +154,11 @@ def test_sam_file_open_writing( def test_sam_file_open_writing_header_keyword( - expected_records: List[pysam.AlignedSegment], header_dict: AlignmentHeader, tmpdir: TmpDir + expected_records: List[pysam.AlignedSegment], header_dict: AlignmentHeader, tmp_path: Path ) -> None: # Use SamWriter # use header as a keyword argument - with NamedTemp(suffix=".sam", dir=tmpdir, mode="w", delete=False) as fp: + with NamedTemp(suffix=".sam", dir=tmp_path, mode="w", delete=False) as fp: with sam.writer(path=fp.name, header=header_dict, file_type=SamFileType.SAM) as sam_writer: for r in expected_records: sam_writer.write(r) diff --git a/fgpyo/util/metric.py b/fgpyo/util/metric.py index 9379114c..2bff416f 100644 --- a/fgpyo/util/metric.py +++ b/fgpyo/util/metric.py @@ -115,6 +115,7 @@ import attr +from fgpyo import io from fgpyo.util import inspect MetricType = TypeVar("MetricType") @@ -161,7 +162,7 @@ def read(cls, path: Path, ignore_extra_fields: bool = True) -> Iterator[Any]: ignore_extra_fields: True to ignore any extra columns, False to raise an exception. """ parsers = cls._parsers() - with path.open("r") as reader: + with io.to_reader(path) as reader: header: List[str] = reader.readline().rstrip("\r\n").split("\t") # check the header class_fields = set(cls.header()) @@ -234,7 +235,7 @@ def write(cls, path: Path, *values: MetricType) -> None: path: path to the output file values: zero or more metrics. """ - with path.open("w") as writer: + with io.to_writer(path) as writer: writer.write("\t".join(cls.header())) writer.write("\n") for value in values: diff --git a/fgpyo/util/tests/test_metric.py b/fgpyo/util/tests/test_metric.py index ab8fd8ad..14b2e13d 100644 --- a/fgpyo/util/tests/test_metric.py +++ b/fgpyo/util/tests/test_metric.py @@ -1,4 +1,5 @@ import enum +import gzip from pathlib import Path from typing import Any from typing import Callable @@ -10,7 +11,6 @@ import attr import pytest -from py._path.local import LocalPath as TmpDir from fgpyo.util.metric import Metric @@ -138,8 +138,8 @@ class PersonDefault(Metric["PersonDefault"]): @pytest.mark.parametrize("metric", DUMMY_METRICS) -def test_metric_roundtrip(tmpdir: TmpDir, metric: DummyMetric) -> None: - path: Path = Path(tmpdir) / "metrics.txt" +def test_metric_roundtrip(tmp_path: Path, metric: DummyMetric) -> None: + path: Path = Path(tmp_path) / "metrics.txt" DummyMetric.write(path, metric) metrics: List[DummyMetric] = list(DummyMetric.read(path=path)) @@ -148,8 +148,8 @@ def test_metric_roundtrip(tmpdir: TmpDir, metric: DummyMetric) -> None: assert metrics[0] == metric -def test_metrics_roundtrip(tmpdir: TmpDir) -> None: - path: Path = Path(tmpdir) / "metrics.txt" +def test_metrics_roundtrip(tmp_path: Path) -> None: + path: Path = Path(tmp_path) / "metrics.txt" DummyMetric.write(path, *DUMMY_METRICS) metrics: List[DummyMetric] = list(DummyMetric.read(path=path)) @@ -158,9 +158,23 @@ def test_metrics_roundtrip(tmpdir: TmpDir) -> None: assert metrics == DUMMY_METRICS -def test_metrics_read_extra_columns(tmpdir: TmpDir) -> None: +def test_metric_roundtrip_gzip(tmp_path: Path) -> None: + path: Path = Path(tmp_path) / "metrics.txt.gz" + + DummyMetric.write(path, *DUMMY_METRICS) + + with gzip.open(path, "r") as handle: + handle.read(1) # Will raise an exception if not a GZIP file. + + metrics: List[DummyMetric] = list(DummyMetric.read(path=path)) + + assert len(metrics) == len(DUMMY_METRICS) + assert metrics == DUMMY_METRICS + + +def test_metrics_read_extra_columns(tmp_path: Path) -> None: person = Person(name="Max", age=42) - path = Path(tmpdir) / "metrics.txt" + path = Path(tmp_path) / "metrics.txt" with path.open("w") as writer: header = Person.header() header.append("foo") @@ -173,9 +187,9 @@ def test_metrics_read_extra_columns(tmpdir: TmpDir) -> None: list(Person.read(path=path, ignore_extra_fields=False)) -def test_metrics_read_missing_optional_columns(tmpdir: TmpDir) -> None: +def test_metrics_read_missing_optional_columns(tmp_path: Path) -> None: person = PersonMaybeAge(name="Max", age=None) - path = Path(tmpdir) / "metrics.txt" + path = Path(tmp_path) / "metrics.txt" # The "age" column is optional, and not in the file, but that's ok with path.open("w") as writer: @@ -189,9 +203,9 @@ def test_metrics_read_missing_optional_columns(tmpdir: TmpDir) -> None: list(PersonMaybeAge.read(path=path)) -def test_metric_read_missing_column_with_default(tmpdir: TmpDir) -> None: +def test_metric_read_missing_column_with_default(tmp_path: Path) -> None: person = PersonDefault(name="Max") - path = Path(tmpdir) / "metrics.txt" + path = Path(tmp_path) / "metrics.txt" # The "age" column hs a default, and not in the file, but that's ok with path.open("w") as writer: