From c9ed80d6702f1c0096c1842e28c99820bab300e9 Mon Sep 17 00:00:00 2001 From: sambles Date: Mon, 5 Aug 2024 10:39:05 +0100 Subject: [PATCH] Fix allow blank errors with OED v4 spec (#128) * Fix allow blank errors with OED v4 * pep --- ods_tools/oed/source.py | 2 +- ods_tools/oed/validator.py | 17 ++++++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/ods_tools/oed/source.py b/ods_tools/oed/source.py index a5fa528..ac69329 100644 --- a/ods_tools/oed/source.py +++ b/ods_tools/oed/source.py @@ -309,7 +309,7 @@ def prepare_df(cls, df, column_to_field, ods_fields): for field_info in ods_fields.values(): col = field_info['Input Field Name'] if col not in present_field: - if field_info.get('Required Field') == 'R' and field_info.get("Allow blanks?").upper() == "YES": + if field_info.get('Required Field') == 'R' and field_info.get("Allow blanks?", '').upper() == "YES": if field_info['pd_dtype'] == 'category': df[col] = '' if field_info['Default'] == 'n/a' else field_info['Default'] df[col] = df[col].astype('category') diff --git a/ods_tools/oed/validator.py b/ods_tools/oed/validator.py index 40fa318..ba26221 100644 --- a/ods_tools/oed/validator.py +++ b/ods_tools/oed/validator.py @@ -126,7 +126,9 @@ def check_required_fields(self): for field_info in input_fields.values(): if field_info['Input Field Name'] not in field_to_columns: - if field_info.get('Required Field') == 'R': + # OED v4 = 'Property field status' and OED v3 = 'Required Field' + requ_field_ref = 'Property field status' if 'Property field status' in field_info else 'Required Field' + if field_info.get(requ_field_ref) == 'R': invalid_data.append({'name': oed_source.oed_name, 'source': oed_source.current_source, 'msg': f"missing required column {field_info['Input Field Name']}"}) continue @@ -134,7 +136,11 @@ def check_required_fields(self): if isinstance(columns, str): columns = [columns] for column in columns: - if field_info.get("Allow blanks?").upper() == 'NO': + blanks_not_allowed = any([ + field_info.get("Allow blanks?", '').upper() == 'NO', # OED v3 + field_info.get('Property field status', '').upper() == 'R' # OED v4 + ]) + if blanks_not_allowed: missing_value_df = oed_source.dataframe[is_empty(oed_source.dataframe, column)] if not missing_value_df.empty: invalid_data.append({'name': oed_source.oed_name, 'source': oed_source.current_source, @@ -170,10 +176,15 @@ def check_valid_values(self): identifier_field = self.identifier_field_maps[oed_source] for column, field_info in column_to_field.items(): valid_ranges = field_info['Valid value range'] + blanks_allowed = any([ + field_info.get('Allow blanks?', '').lower() == 'yes', # OED v3 + field_info.get('Property field status', '').upper() != 'R', # OED v4 + ]) + if valid_ranges != 'n/a': is_valid_value = functools.partial(OedSchema.is_valid_value, valid_ranges=valid_ranges, - allow_blanks=field_info['Allow blanks?'].lower() == 'yes') + allow_blanks=blanks_allowed) invalid_range_data = oed_source.dataframe[~oed_source.dataframe[column].apply(is_valid_value)] if not invalid_range_data.empty: invalid_data.append({'name': oed_source.oed_name, 'source': oed_source.current_source,