feat: add the skip_extra argument to Metric.read

This allows the input metric files to have additional columns. Fixes #49.
fulcrumgenomics · Aug 4, 2023 · a2eb1b7 · a2eb1b7
1 parent 0c17fe0
commit a2eb1b7
Show file tree

Hide file tree

Showing 2 changed files with 27 additions and 5 deletions.
diff --git a/fgpyo/util/metric.py b/fgpyo/util/metric.py
@@ -148,18 +148,25 @@ def _parsers(cls) -> Dict[type, Callable[[str], Any]]:
         return {}
 
     @classmethod
-    def read(cls, path: Path) -> Iterator[Any]:
+    def read(cls, path: Path, skip_extra: bool = True) -> Iterator[Any]:
         """Reads in zero or more metrics from the given path.
 
         The metric file must contain a matching header.
 
         Args:
             path: the path to the metrics file.
+            skip_extra: True to ignore any extra columns, False to raise an exception.
         """
         parsers = cls._parsers()
         with path.open("r") as reader:
             header: List[str] = reader.readline().rstrip("\r\n").split("\t")
-            assert header == cls.header(), "Header did not match"
+            cls_header = cls.header()
+            # check the header
+            for field in cls_header:
+                assert field in header, f"Missing field '{field}' in file: {path}"
+            if not skip_extra:
+                for field in header:
+                    assert field in cls_header, f"Extra field '{field}' in file: {path}"
             for line in reader:
                 fields: List[str] = line.rstrip("\r\n").split("\t")
                 instance: Metric[MetricType] = inspect.attr_from(
@@ -208,7 +215,7 @@ def format_value(cls, value: Any) -> str:
         `str` to all others.
 
         Dictionaries / mappings will have keys and vals separated by semicolons, and key val pairs
-        pairs delimited by commas.
+        delimited by commas.
 
         In addition, lists will be flanked with '[]', tuples with '()' and sets and dictionaries
         with '{}'

diff --git a/fgpyo/util/tests/test_metric.py b/fgpyo/util/tests/test_metric.py
@@ -146,6 +146,21 @@ def test_metrics_roundtrip(tmpdir: TmpDir) -> None:
     assert metrics == DUMMY_METRICS
 
 
+def test_metrics_read_extra_columns(tmpdir: TmpDir) -> None:
+    person = Person(name="Max", age=42)
+    path = Path(tmpdir) / "metrics.txt"
+    with path.open("w") as writer:
+        header = Person.header()
+        header.append("foo")
+        writer.write("\t".join(header) + "\n")
+        writer.write(f"{person.name}\t{person.age}\tbar\n")
+
+    assert list(Person.read(path=path)) == [person]
+    assert list(Person.read(path=path, skip_extra=True)) == [person]
+    with pytest.raises(AssertionError):
+        list(Metric.read(path=path, skip_extra=False))
+
+
 def test_metric_header() -> None:
     assert DummyMetric.header() == [
         "int_value",
@@ -186,7 +201,7 @@ def test_metric_custom_formatter() -> None:
     assert list(person.formatted_values()) == ["john doe", "42"]
 
 
-def test_metric_parse_with_None() -> None:
+def test_metric_parse_with_none() -> None:
     assert Person.parse(fields=["", "40"]) == Person(name=None, age=40)
     assert Person.parse(fields=["Sally", ""]) == Person(name="Sally", age=None)
     assert Person.parse(fields=["", ""]) == Person(name=None, age=None)
@@ -228,7 +243,7 @@ def test_metric_list_format_with_empty_string() -> None:
     )
 
 
-def test_metric_list_parse_with_None() -> None:
+def test_metric_list_parse_with_none() -> None:
     assert ListPerson.parse(fields=[",Sally", "40, 30"]) == ListPerson(
         name=[None, "Sally"], age=[40, 30]
     )