Skip to content

Commit

Permalink
feat: add the skip_extra argument to Metric.read
Browse files Browse the repository at this point in the history
This allows the input metric files to have additional columns.

Fixes #49.
  • Loading branch information
nh13 committed Aug 4, 2023
1 parent 0c17fe0 commit a2eb1b7
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 5 deletions.
13 changes: 10 additions & 3 deletions fgpyo/util/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,18 +148,25 @@ def _parsers(cls) -> Dict[type, Callable[[str], Any]]:
return {}

@classmethod
def read(cls, path: Path) -> Iterator[Any]:
def read(cls, path: Path, skip_extra: bool = True) -> Iterator[Any]:
"""Reads in zero or more metrics from the given path.
The metric file must contain a matching header.
Args:
path: the path to the metrics file.
skip_extra: True to ignore any extra columns, False to raise an exception.
"""
parsers = cls._parsers()
with path.open("r") as reader:
header: List[str] = reader.readline().rstrip("\r\n").split("\t")
assert header == cls.header(), "Header did not match"
cls_header = cls.header()
# check the header
for field in cls_header:
assert field in header, f"Missing field '{field}' in file: {path}"
if not skip_extra:
for field in header:
assert field in cls_header, f"Extra field '{field}' in file: {path}"
for line in reader:
fields: List[str] = line.rstrip("\r\n").split("\t")
instance: Metric[MetricType] = inspect.attr_from(
Expand Down Expand Up @@ -208,7 +215,7 @@ def format_value(cls, value: Any) -> str:
`str` to all others.
Dictionaries / mappings will have keys and vals separated by semicolons, and key val pairs
pairs delimited by commas.
delimited by commas.
In addition, lists will be flanked with '[]', tuples with '()' and sets and dictionaries
with '{}'
Expand Down
19 changes: 17 additions & 2 deletions fgpyo/util/tests/test_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,21 @@ def test_metrics_roundtrip(tmpdir: TmpDir) -> None:
assert metrics == DUMMY_METRICS


def test_metrics_read_extra_columns(tmpdir: TmpDir) -> None:
person = Person(name="Max", age=42)
path = Path(tmpdir) / "metrics.txt"
with path.open("w") as writer:
header = Person.header()
header.append("foo")
writer.write("\t".join(header) + "\n")
writer.write(f"{person.name}\t{person.age}\tbar\n")

assert list(Person.read(path=path)) == [person]
assert list(Person.read(path=path, skip_extra=True)) == [person]
with pytest.raises(AssertionError):
list(Metric.read(path=path, skip_extra=False))


def test_metric_header() -> None:
assert DummyMetric.header() == [
"int_value",
Expand Down Expand Up @@ -186,7 +201,7 @@ def test_metric_custom_formatter() -> None:
assert list(person.formatted_values()) == ["john doe", "42"]


def test_metric_parse_with_None() -> None:
def test_metric_parse_with_none() -> None:
assert Person.parse(fields=["", "40"]) == Person(name=None, age=40)
assert Person.parse(fields=["Sally", ""]) == Person(name="Sally", age=None)
assert Person.parse(fields=["", ""]) == Person(name=None, age=None)
Expand Down Expand Up @@ -228,7 +243,7 @@ def test_metric_list_format_with_empty_string() -> None:
)


def test_metric_list_parse_with_None() -> None:
def test_metric_list_parse_with_none() -> None:
assert ListPerson.parse(fields=[",Sally", "40, 30"]) == ListPerson(
name=[None, "Sally"], age=[40, 30]
)
Expand Down

0 comments on commit a2eb1b7

Please sign in to comment.