feat: update Metric.read to use _read_header

chore: type hint fix: method name
fulcrumgenomics · Jun 6, 2024 · c8a083c · c8a083c
1 parent 032388d
commit c8a083c
Show file tree

Hide file tree

Showing 2 changed files with 48 additions and 7 deletions.
diff --git a/fgpyo/util/metric.py b/fgpyo/util/metric.py
@@ -192,7 +192,12 @@ def _parsers(cls) -> Dict[type, Callable[[str], Any]]:
         return {}
 
     @classmethod
-    def read(cls, path: Path, ignore_extra_fields: bool = True) -> Iterator["Metric[MetricType]"]:
+    def read(
+        cls,
+        path: Path,
+        ignore_extra_fields: bool = True,
+        comment_prefix: str = "#",
+    ) -> Iterator["Metric[MetricType]"]:
         """Reads in zero or more metrics from the given path.
 
         The metric file must contain a matching header.
@@ -203,13 +208,23 @@ def read(cls, path: Path, ignore_extra_fields: bool = True) -> Iterator["Metric[
         Args:
             path: the path to the metrics file.
             ignore_extra_fields: True to ignore any extra columns, False to raise an exception.
+            comment_prefix: Any lines beginning with this string will be ignored before parsing the
+                header.
+
+        Raises:
+            ValueError: if the file does not contain a header.
         """
         parsers = cls._parsers()
         with io.to_reader(path) as reader:
-            header: List[str] = reader.readline().rstrip("\r\n").split("\t")
+            try:
+                header = cls._read_header(reader, comment_prefix=comment_prefix)
+                fieldnames: List[str] = header.fieldnames
+            except ValueError:
+                raise ValueError(f"No header found in file: {path}")
+
             # check the header
             class_fields = set(cls.header())
-            file_fields = set(header)
+            file_fields = set(fieldnames)
             missing_from_class = file_fields.difference(class_fields)
             missing_from_file = class_fields.difference(file_fields)
 
@@ -247,15 +262,15 @@ def read(cls, path: Path, ignore_extra_fields: bool = True) -> Iterator["Metric[
                 values: List[str] = line.rstrip("\r\n").split("\t")
 
                 # raise an exception if there aren't the same number of values as the header
-                if len(header) != len(values):
+                if len(fieldnames) != len(values):
                     raise ValueError(
-                        f"In file: {path}, expected {len(header)} columns, got {len(values)} on "
-                        f"line {lineno}: {line}"
+                        f"In file: {path}, expected {len(fieldnames)} columns, "
+                        f"got {len(values)} on line {lineno}: {line}"
                     )
 
                 # build the metric
                 instance: Metric[MetricType] = inspect.attr_from(
-                    cls=cls, kwargs=dict(zip(header, values)), parsers=parsers
+                    cls=cls, kwargs=dict(zip(fieldnames, values)), parsers=parsers
                 )
                 yield instance
 

diff --git a/fgpyo/util/tests/test_metric.py b/fgpyo/util/tests/test_metric.py
@@ -590,3 +590,29 @@ def test_read_header_can_read_picard(tmp_path: Path) -> None:
         header = Metric._read_header(metrics_file, comment_prefix="#")
 
     assert header.fieldnames == ["SAMPLE", "FOO", "BAR"]
+
+
+@pytest.mark.parametrize(
+    "lines",
+    [
+        [],
+        [""],
+        ["# comment"],
+        ["", "# comment"],
+    ],
+)
+def test_read_validates_no_header(tmp_path: Path, lines: List[str]) -> None:
+    """
+    Test our handling of a file with no header.
+
+    1. The helper `Metric._read_header` returns None
+    2. `Metric.read` raises `ValueError`
+    """
+
+    metrics_path = tmp_path / "bad_metrics"
+
+    with metrics_path.open("w") as metrics_file:
+        metrics_file.writelines(lines)
+
+    with pytest.raises(ValueError, match="No header found"):
+        [m for m in dataclasses_data_and_classes.DummyMetric.read(metrics_path)]