Skip to content

Commit

Permalink
Merge pull request #116 from OasisLMF/release/3.1.5
Browse files Browse the repository at this point in the history
Release 3.1.5 (30th May 2024)
  • Loading branch information
sambles authored May 30, 2024
2 parents 3c9a3a2 + 72fa3ee commit 9b72eb5
Show file tree
Hide file tree
Showing 8 changed files with 87 additions and 25 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
ODS_Tools Changelog
===================

`3.1.5`_
---------
* [#80](https://github.com/OasisLMF/ODS_Tools/pull/80) - Added fields to analysis settings schema
* [#115](https://github.com/OasisLMF/ODS_Tools/pull/115) - fix issue when categorical column have a default value
* [#117](https://github.com/OasisLMF/ODS_Tools/pull/117) - Fix/fill empty
* [#78](https://github.com/OasisLMF/ODS_Tools/pull/78) - Release 3.1.4
.. _`3.1.5`: https://github.com/OasisLMF/ODS_Tools/compare/3.1.4...3.1.5

.. _`3.1.4`: https://github.com/OasisLMF/ODS_Tools/compare/3.1.3...3.1.4

`3.1.3`_
Expand Down
2 changes: 1 addition & 1 deletion ods_tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '3.1.4'
__version__ = '3.1.5'

import logging

Expand Down
4 changes: 2 additions & 2 deletions ods_tools/oed/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
from .common import (
OdsException, PANDAS_COMPRESSION_MAP, PANDAS_DEFAULT_NULL_VALUES, USUAL_FILE_NAME, OED_TYPE_TO_NAME,
OED_NAME_TO_TYPE, OED_IDENTIFIER_FIELDS, VALIDATOR_ON_ERROR_ACTION, DEFAULT_VALIDATION_CONFIG, OED_PERIL_COLUMNS, fill_empty,
UnknownColumnSaveOption, BLANK_VALUES
UnknownColumnSaveOption, BLANK_VALUES, is_empty
)


__all__ = [
'OedExposure', 'OedSchema', 'OedSource', 'ModelSettingSchema', 'AnalysisSettingSchema',
'OdsException', 'PANDAS_COMPRESSION_MAP', 'PANDAS_DEFAULT_NULL_VALUES', 'USUAL_FILE_NAME', 'OED_TYPE_TO_NAME',
'OED_NAME_TO_TYPE', 'OED_IDENTIFIER_FIELDS', 'VALIDATOR_ON_ERROR_ACTION', 'DEFAULT_VALIDATION_CONFIG', 'OED_PERIL_COLUMNS', 'fill_empty',
'UnknownColumnSaveOption', 'BLANK_VALUES'
'UnknownColumnSaveOption', 'BLANK_VALUES', 'is_empty'
]
15 changes: 14 additions & 1 deletion ods_tools/oed/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,14 +143,27 @@ def __get__(self, obj, type=None):

BLANK_VALUES = {np.nan, '', None, pd.NA, pd.NaT}

dtype_to_python = {
'Int8': int,
'Int32': int,
'Int64': int,
'bytes': lambda x: bytes(x, 'utf-8'),
'float64': float,
'category': str
}


def is_empty(df, columns):
return (df[columns].isnull()) | (df[columns] == '')


def fill_empty(df, columns, value):
if isinstance(columns, str):
columns = [columns]
for column in columns:
if df[column].dtypes.name == 'category' and value not in {None, np.nan}.union(df[column].cat.categories):
df[column] = df[column].cat.add_categories(value)
df.loc[df[column].isin(BLANK_VALUES), column] = value
df.loc[is_empty(df, column), column] = value


class UnknownColumnSaveOption(Enum):
Expand Down
21 changes: 20 additions & 1 deletion ods_tools/oed/oed_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numba as nb
import numpy as np

from .common import OdsException, BLANK_VALUES, cached_property
from .common import OdsException, BLANK_VALUES, cached_property, dtype_to_python

ENV_ODS_SCHEMA_PATH = os.getenv('ODS_SCHEMA_PATH')

Expand Down Expand Up @@ -121,6 +121,25 @@ def peril_filtering(self, peril_ids, peril_filters):
"""
return jit_peril_filtering(peril_ids.to_numpy().astype('str'), peril_filters.to_numpy().astype('str'), self.nb_perils_dict)

@staticmethod
def get_default_from_ods_fields(ods_fields, field_name):
field_info = ods_fields.get(field_name.lower())
if field_info is None:
return ''
if field_info['pd_dtype'] == 'category':
if field_info['Default'] != 'n/a':
return field_info['Default']
else:
return ''
else:
if field_info['Default'] != 'n/a':
return dtype_to_python[field_info['pd_dtype']](field_info['Default'])
else:
return np.nan

def get_default(self, field_name, oed_type='null'):
return self.get_default_from_ods_fields(self.schema['input_fields'][oed_type], field_name)

@staticmethod
def to_universal_field_name(column: str):
"""
Expand Down
13 changes: 4 additions & 9 deletions ods_tools/oed/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import numpy as np
from chardet.universaldetector import UniversalDetector

from .common import (OED_TYPE_TO_NAME, OdsException, PANDAS_COMPRESSION_MAP, PANDAS_DEFAULT_NULL_VALUES, is_relative, BLANK_VALUES, fill_empty,
UnknownColumnSaveOption, cached_property)
from .common import (OED_TYPE_TO_NAME, OdsException, PANDAS_COMPRESSION_MAP, PANDAS_DEFAULT_NULL_VALUES, is_relative, fill_empty,
UnknownColumnSaveOption, cached_property, is_empty)
from .forex import convert_currency
from .oed_schema import OedSchema

Expand Down Expand Up @@ -267,7 +267,7 @@ def as_oed_type(cls, oed_df, column_to_field):
if oed_df[column].dtype.name == 'category' and '' not in oed_df[column].dtype.categories:
oed_df[column] = oed_df[column].cat.add_categories('')
oed_df[column] = oed_df[column] # make a copy f the col in case it is read_only
oed_df.loc[oed_df[column].isin(BLANK_VALUES), column] = ''
oed_df.loc[is_empty(oed_df, column), column] = ''
elif pd_dtype[column].startswith('Int'):
to_tmp_dtype[column] = 'float'

Expand All @@ -287,12 +287,7 @@ def prepare_df(cls, df, column_to_field, ods_fields):
"""
# set default values
for col, field_info in column_to_field.items():
if (field_info
and field_info['Default'] != 'n/a'
and (df[col].isna().any() or (field_info['pd_dtype'] == 'category' and df[col].isnull().any()))):
fill_empty(df, col, df[col].dtype.type(field_info['Default']))
elif df[col].dtype.name == 'category':
fill_empty(df, col, '')
fill_empty(df, col, OedSchema.get_default_from_ods_fields(ods_fields, col))

# add required columns that allow blank values if missing
present_field = set(field_info['Input Field Name'] for field_info in column_to_field.values())
Expand Down
25 changes: 15 additions & 10 deletions ods_tools/oed/validator.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import functools
import json
import numpy as np
import logging

from pathlib import Path
from collections.abc import Iterable

from .common import (OdsException, OED_PERIL_COLUMNS, OED_IDENTIFIER_FIELDS, DEFAULT_VALIDATION_CONFIG,
VALIDATOR_ON_ERROR_ACTION, BLANK_VALUES)
VALIDATOR_ON_ERROR_ACTION, BLANK_VALUES, is_empty)
from .oed_schema import OedSchema

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -134,7 +135,7 @@ def check_required_fields(self):
columns = [columns]
for column in columns:
if field_info.get("Allow blanks?").upper() == 'NO':
missing_value_df = oed_source.dataframe[oed_source.dataframe[column].isin(BLANK_VALUES)]
missing_value_df = oed_source.dataframe[is_empty(oed_source.dataframe, column)]
if not missing_value_df.empty:
invalid_data.append({'name': oed_source.oed_name, 'source': oed_source.current_source,
'msg': f"column '{column}' has missing values in \n"
Expand Down Expand Up @@ -222,8 +223,9 @@ def check_occupancy_code(self):
if occupancy_code_column is None:
continue
identifier_field = self.identifier_field_maps[oed_source]
invalid_occupancy_code = oed_source.dataframe[~oed_source.dataframe[occupancy_code_column].astype(str).isin(
set(self.exposure.oed_schema.schema['occupancy']) | BLANK_VALUES)]
invalid_occupancy_code = oed_source.dataframe[~(np.isin(oed_source.dataframe[occupancy_code_column].astype(str),
list(self.exposure.oed_schema.schema['occupancy']))
| is_empty(oed_source.dataframe, occupancy_code_column))]
if not invalid_occupancy_code.empty:
invalid_data.append({'name': oed_source.oed_name, 'source': oed_source.current_source,
'msg': f"invalid OccupancyCode.\n"
Expand All @@ -242,8 +244,9 @@ def check_construction_code(self):
if construction_code_column is None:
continue
identifier_field = self.identifier_field_maps[oed_source]
invalid_construction_code = oed_source.dataframe[~oed_source.dataframe[construction_code_column].astype(str).isin(
set(self.exposure.oed_schema.schema['construction']) | BLANK_VALUES)]
invalid_construction_code = oed_source.dataframe[~(np.isin(oed_source.dataframe[construction_code_column].astype(str),
list(self.exposure.oed_schema.schema['construction']))
| is_empty(oed_source.dataframe, construction_code_column))]
if not invalid_construction_code.empty:
invalid_data.append({'name': oed_source.oed_name, 'source': oed_source.current_source,
'msg': f"invalid ConstructionCode.\n"
Expand All @@ -265,8 +268,9 @@ def check_country_and_area_code(self):
identifier_field = self.identifier_field_maps[oed_source]
area_code_column = self.field_to_column_maps[oed_source].get('AreaCode')
if area_code_column is not None:
country_only_df = oed_source.dataframe[oed_source.dataframe[area_code_column].isin(BLANK_VALUES)]
country_area_df = oed_source.dataframe[~oed_source.dataframe[area_code_column].isin(BLANK_VALUES)]
country_only_df = oed_source.dataframe[is_empty(oed_source.dataframe, area_code_column)]
country_area_df = oed_source.dataframe[~is_empty(oed_source.dataframe, area_code_column)]

invalid_country_area = (country_area_df[
~(country_area_df[[country_code_column, area_code_column]]
.apply(tuple, axis=1)
Expand All @@ -279,8 +283,9 @@ def check_country_and_area_code(self):
f"{invalid_country_area[identifier_field + [country_code_column, area_code_column]]}"})
else:
country_only_df = oed_source.dataframe
invalid_country = (country_only_df[~country_only_df[country_code_column]
.isin(set(self.exposure.oed_schema.schema['country']) | BLANK_VALUES)])
invalid_country = (country_only_df[~(np.isin(country_only_df[country_code_column],
list(self.exposure.oed_schema.schema['country']))
| is_empty(country_only_df, country_code_column))])
if not invalid_country.empty:
invalid_data.append({'name': oed_source.oed_name, 'source': oed_source.current_source,
'msg': f"invalid CountryCode.\n"
Expand Down
24 changes: 23 additions & 1 deletion tests/test_ods_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,24 @@ def test_load_oed_from_config(self):
exposure2 = OedExposure(**config)
self.assertTrue(exposure.location.dataframe.equals(exposure2.location.dataframe))

def test_categorical_with_default(self):
# UseReinsDates is a string column with a non null default, check default setting works
with tempfile.TemporaryDirectory() as tmp_run_dir:
config = {
'ri_info': base_url + '/SourceReinsInfoOEDPiWind.csv',
'use_field': True
}
exposure = OedExposure(**config)
exposure.ri_info.dataframe['UseReinsDates'] = None
exposure.ri_info.dataframe.to_csv(os.path.join(tmp_run_dir, 'ri_info.csv'), index=False)

exposure = OedExposure(**{
'ri_info': os.path.join(tmp_run_dir, 'ri_info.csv'),
'use_field': True
})
ri_scope = exposure.ri_info.dataframe
self.assertTrue(isinstance(ri_scope, pd.DataFrame))

def test_load_oed_from_df(self):
location_df = pd.DataFrame({
'PortNumber': [1, 1],
Expand Down Expand Up @@ -387,7 +405,11 @@ def test_field_required_allow_blank_are_set_to_default(self):
'use_field': True})

original_exposure.location.dataframe.drop(columns=['ContentsTIV'], inplace=True)
original_exposure.location.dataframe['BITIV'] = np.nan
original_exposure.location.dataframe['BITIV'] = pd.NA
original_exposure.location.dataframe.loc[[1], 'BITIV'] = np.nan
original_exposure.location.dataframe.loc[[2], 'BITIV'] = None
original_exposure.location.dataframe.loc[[3], 'BITIV'] = pd.NaT
original_exposure.location.dataframe.loc[[4], 'BITIV'] = ''
original_exposure.ri_info.dataframe.drop(columns='RiskLevel', inplace=True)

with tempfile.TemporaryDirectory() as tmp_dir:
Expand Down

0 comments on commit 9b72eb5

Please sign in to comment.