Skip to content

Commit

Permalink
Improve unknown value error message (#388)
Browse files Browse the repository at this point in the history
* Move log_error to error.py

* Add project to DataStructureDefinition

* Use project information for error logging

* Adjust tests
  • Loading branch information
phackstock authored Sep 16, 2024
1 parent 71ffcab commit 7114796
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 22 deletions.
7 changes: 0 additions & 7 deletions nomenclature/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,6 @@

import yaml


def log_error(dimension, error_list):
"""Compile an error message and write to log"""
msg = f"The following {dimension}(s) are not defined in the {dimension} codelist:"
logging.error("\n - ".join(map(str, [msg] + error_list)))


from nomenclature.cli import cli # noqa
from nomenclature.codelist import CodeList # noqa
from nomenclature.core import process # noqa
Expand Down
40 changes: 31 additions & 9 deletions nomenclature/codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@
from pydantic_core import PydanticCustomError

import nomenclature
from nomenclature import log_error
from nomenclature.code import Code, MetaCode, RegionCode, VariableCode
from nomenclature.config import CodeListConfig, NomenclatureConfig
from nomenclature.error import ErrorCollector, custom_pydantic_errors
from nomenclature.error import ErrorCollector, custom_pydantic_errors, log_error

here = Path(__file__).parent.absolute()

Expand Down Expand Up @@ -98,9 +97,14 @@ def keys(self):
def values(self):
return self.mapping.values()

def validate_data(self, df: IamDataFrame, dimension: str) -> bool:
def validate_data(
self,
df: IamDataFrame,
dimension: str,
project: str | None = None,
) -> bool:
if invalid := self.validate_items(getattr(df, dimension)):
log_error(dimension, invalid)
log_error(dimension, invalid, project)
return False
return True

Expand Down Expand Up @@ -600,7 +604,11 @@ def vars_kwargs(self, variables: List[str]) -> List[VariableCode]:
if self[var].agg_kwargs and not self[var].skip_region_aggregation
]

def validate_units(self, unit_mapping) -> bool:
def validate_units(
self,
unit_mapping,
project: None | str = None,
) -> bool:
if invalid_units := [
(variable, unit, self.mapping[variable].unit)
for variable, unit in unit_mapping.items()
Expand All @@ -613,14 +621,28 @@ def validate_units(self, unit_mapping) -> bool:
for v, u, e in invalid_units
]
msg = "The following variable(s) are reported with the wrong unit:"
logging.error("\n - ".join([msg] + lst))
file_service_address = "https://files.ece.iiasa.ac.at"
logging.error(
"\n - ".join([msg] + lst)
+ (
f"\n\nPlease refer to {file_service_address}/{project}/"
f"{project}-template.xlsx for the list of allowed units."
if project is not None
else ""
)
)
return False
return True

def validate_data(self, df: IamDataFrame, dimension: str) -> bool:
def validate_data(
self,
df: IamDataFrame,
dimension: str,
project: str | None = None,
) -> bool:
# validate variables
all_variables_valid = super().validate_data(df, dimension)
all_units_valid = self.validate_units(df.unit_mapping)
all_variables_valid = super().validate_data(df, dimension, project)
all_units_valid = self.validate_units(df.unit_mapping, project)
return all_variables_valid and all_units_valid

def list_missing_variables(
Expand Down
8 changes: 7 additions & 1 deletion nomenclature/definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def __init__(self, path, dimensions=None):
path = Path(path)

self.project_folder = path.parent
self.project = self.project_folder.name.split("-workflow")[0]

if (file := self.project_folder / "nomenclature.yaml").exists():
self.config = NomenclatureConfig.from_file(file=file)
Expand Down Expand Up @@ -98,7 +99,12 @@ def validate(self, df: IamDataFrame, dimensions: list | None = None) -> None:
"""

if any(
getattr(self, dimension).validate_data(df, dimension) is False
getattr(self, dimension).validate_data(
df,
dimension,
self.project,
)
is False
for dimension in (dimensions or self.dimensions)
):
raise ValueError("The validation failed. Please check the log for details.")
Expand Down
21 changes: 21 additions & 0 deletions nomenclature/error.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import textwrap
from collections import namedtuple
from typing import Optional
Expand Down Expand Up @@ -71,3 +72,23 @@ def __repr__(self) -> str:

def __bool__(self) -> bool:
return bool(self.errors)


def log_error(
dimension: str,
error_list,
project: str | None = None,
) -> None:
"""Compile an error message and write to log"""
file_service_address = "https://files.ece.iiasa.ac.at"
msg = f"The following {dimension}(s) are not defined in the {dimension} codelist:"

logging.error(
"\n - ".join(map(str, [msg] + error_list))
+ (
f"\n\nPlease refer to {file_service_address}/{project}/{project}"
f"-template.xlsx for the list of allowed {dimension}s."
if project is not None
else ""
)
)
3 changes: 1 addition & 2 deletions nomenclature/processor/region.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,9 @@
from pydantic.types import DirectoryPath, FilePath
from pydantic_core import PydanticCustomError

from nomenclature import log_error
from nomenclature.codelist import RegionCodeList, VariableCodeList
from nomenclature.definition import DataStructureDefinition
from nomenclature.error import custom_pydantic_errors, ErrorCollector
from nomenclature.error import custom_pydantic_errors, ErrorCollector, log_error
from nomenclature.processor import Processor
from nomenclature.processor.utils import get_relative_path

Expand Down
18 changes: 15 additions & 3 deletions tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,28 +28,40 @@ def test_validation_brackets(extras_definition, simple_df):
extras_definition.validate(simple_df)


def test_validation_fails_variable(simple_definition, simple_df):
def test_validation_fails_variable(simple_definition, simple_df, caplog):
"""Changing a variable name raises"""
simple_df.rename(variable={"Primary Energy": "foo"}, inplace=True)

with pytest.raises(ValueError, match=MATCH_FAIL_VALIDATION):
simple_definition.validate(simple_df)
assert (
"Please refer to https://files.ece.iiasa.ac.at/data/data-template.xlsx"
" for the list of allowed variables." in caplog.text
)


def test_validation_fails_unit(simple_definition, simple_df):
def test_validation_fails_unit(simple_definition, simple_df, caplog):
"""Changing a unit raises"""
simple_df.rename(unit={"EJ/yr": "GWh/yr"}, inplace=True)

with pytest.raises(ValueError, match=MATCH_FAIL_VALIDATION):
simple_definition.validate(simple_df)
assert (
"Please refer to https://files.ece.iiasa.ac.at/data/data-template.xlsx"
" for the list of allowed units." in caplog.text
)


def test_validation_fails_region(simple_definition, simple_df):
def test_validation_fails_region(simple_definition, simple_df, caplog):
"""Changing a region name raises"""
simple_df.rename(region={"World": "foo"}, inplace=True)

with pytest.raises(ValueError, match=MATCH_FAIL_VALIDATION):
simple_definition.validate(simple_df)
assert (
"Please refer to https://files.ece.iiasa.ac.at/data/data-template.xlsx"
" for the list of allowed regions." in caplog.text
)


def test_validation_fails_region_as_int(simple_definition, simple_df):
Expand Down

0 comments on commit 7114796

Please sign in to comment.