Skip to content

Commit

Permalink
feat: update Metric.read to use _read_header
Browse files Browse the repository at this point in the history
chore: type hint

fix: method name
  • Loading branch information
msto committed Jun 6, 2024
1 parent 032388d commit c8a083c
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 7 deletions.
29 changes: 22 additions & 7 deletions fgpyo/util/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,12 @@ def _parsers(cls) -> Dict[type, Callable[[str], Any]]:
return {}

@classmethod
def read(cls, path: Path, ignore_extra_fields: bool = True) -> Iterator["Metric[MetricType]"]:
def read(
cls,
path: Path,
ignore_extra_fields: bool = True,
comment_prefix: str = "#",
) -> Iterator["Metric[MetricType]"]:
"""Reads in zero or more metrics from the given path.
The metric file must contain a matching header.
Expand All @@ -203,13 +208,23 @@ def read(cls, path: Path, ignore_extra_fields: bool = True) -> Iterator["Metric[
Args:
path: the path to the metrics file.
ignore_extra_fields: True to ignore any extra columns, False to raise an exception.
comment_prefix: Any lines beginning with this string will be ignored before parsing the
header.
Raises:
ValueError: if the file does not contain a header.
"""
parsers = cls._parsers()
with io.to_reader(path) as reader:
header: List[str] = reader.readline().rstrip("\r\n").split("\t")
try:
header = cls._read_header(reader, comment_prefix=comment_prefix)
fieldnames: List[str] = header.fieldnames
except ValueError:
raise ValueError(f"No header found in file: {path}")

# check the header
class_fields = set(cls.header())
file_fields = set(header)
file_fields = set(fieldnames)
missing_from_class = file_fields.difference(class_fields)
missing_from_file = class_fields.difference(file_fields)

Expand Down Expand Up @@ -247,15 +262,15 @@ def read(cls, path: Path, ignore_extra_fields: bool = True) -> Iterator["Metric[
values: List[str] = line.rstrip("\r\n").split("\t")

# raise an exception if there aren't the same number of values as the header
if len(header) != len(values):
if len(fieldnames) != len(values):
raise ValueError(
f"In file: {path}, expected {len(header)} columns, got {len(values)} on "
f"line {lineno}: {line}"
f"In file: {path}, expected {len(fieldnames)} columns, "
f"got {len(values)} on line {lineno}: {line}"
)

# build the metric
instance: Metric[MetricType] = inspect.attr_from(
cls=cls, kwargs=dict(zip(header, values)), parsers=parsers
cls=cls, kwargs=dict(zip(fieldnames, values)), parsers=parsers
)
yield instance

Expand Down
26 changes: 26 additions & 0 deletions fgpyo/util/tests/test_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,3 +590,29 @@ def test_read_header_can_read_picard(tmp_path: Path) -> None:
header = Metric._read_header(metrics_file, comment_prefix="#")

assert header.fieldnames == ["SAMPLE", "FOO", "BAR"]


@pytest.mark.parametrize(
"lines",
[
[],
[""],
["# comment"],
["", "# comment"],
],
)
def test_read_validates_no_header(tmp_path: Path, lines: List[str]) -> None:
"""
Test our handling of a file with no header.
1. The helper `Metric._read_header` returns None
2. `Metric.read` raises `ValueError`
"""

metrics_path = tmp_path / "bad_metrics"

with metrics_path.open("w") as metrics_file:
metrics_file.writelines(lines)

with pytest.raises(ValueError, match="No header found"):
[m for m in dataclasses_data_and_classes.DummyMetric.read(metrics_path)]

0 comments on commit c8a083c

Please sign in to comment.