Skip to content

Commit

Permalink
Merge pull request #110 from OasisLMF/release/3.2.4
Browse files Browse the repository at this point in the history
Release 3.2.4 (30th May 2024)
  • Loading branch information
sambles authored May 30, 2024
2 parents d142d4f + 5e05d8f commit 6fb2a9d
Show file tree
Hide file tree
Showing 13 changed files with 127 additions and 39 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
ODS_Tools Changelog
===================

`3.2.4`_
---------
* [#103](https://github.com/OasisLMF/ODS_Tools/pull/103) - Release 3.2.3
* [#107](https://github.com/OasisLMF/ODS_Tools/pull/108) - update odtf mappings
* [#109](https://github.com/OasisLMF/ODS_Tools/pull/109) - Update OED spec to 3.2.0
* [#3](https://github.com/OasisLMF/ODS_Tools/pull/111) - model schema ord metrics
* [#112](https://github.com/OasisLMF/ODS_Tools/pull/113) - Update analysis settings schema to allow more than 9 summary groups
* [#115](https://github.com/OasisLMF/ODS_Tools/pull/115) - fix issue when categorical column have a default value
* [#117](https://github.com/OasisLMF/ODS_Tools/pull/117) - Fix/fill empty
.. _`3.2.4`: https://github.com/OasisLMF/ODS_Tools/compare/3.2.3...3.2.4

`3.2.3`_
---------
* [#98](https://github.com/OasisLMF/ODS_Tools/pull/104) - odtf outstanding issues
Expand Down
2 changes: 1 addition & 1 deletion ods_tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '3.2.3'
__version__ = '3.2.4'

import logging

Expand Down
3 changes: 1 addition & 2 deletions ods_tools/data/analysis_settings_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@
"multipleOf": 1,
"title": "Summary ID",
"description": "Identifier for the summary set.",
"minimum": 1,
"maximum": 9
"minimum": 1
},
"oed_fields": {
"type": "array",
Expand Down
3 changes: 0 additions & 3 deletions ods_tools/data/model_settings_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,6 @@
"wheatsheaf_mean_aep",
"wheatsheaf_mean_oep",
"wheatsheaf_oep",

"ord_output",
"elt_sample",
"elt_quantile",
"elt_moment",
Expand Down Expand Up @@ -514,7 +512,6 @@
"wheatsheaf_mean_aep",
"wheatsheaf_mean_oep",
"wheatsheaf_oep",
"ord_output",
"elt_sample",
"elt_quantile",
"elt_moment",
Expand Down
37 changes: 31 additions & 6 deletions ods_tools/odtf/data/mappings/mapping_loc_Cede-OED.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ forward:
type: int
City:
type: string
Cladding:
type: int
ColdFormedTube:
type: int
ColumnBasement:
Expand Down Expand Up @@ -327,6 +329,8 @@ forward:
#type: string
WallSiding:
type: int
WallSidingCode:
type: int
WallType:
type: int
WaterHeater:
Expand Down Expand Up @@ -436,6 +440,8 @@ forward:
- transformation: Chimney
City:
- transformation: City
Cladding:
- transformation: WallSidingCode
CondTag:
- transformation: SublimitArea
ConstructionQuality:
Expand Down Expand Up @@ -954,8 +960,6 @@ forward:
- transformation: UserGeocodeMatchLevel
FlexiLocWallCode:
- transformation: WallType
FlexiLocWallSidingCode:
- transformation: WallSiding
FlexiLocWaterHeaterCode:
- transformation: WaterHeater
FlexiLocWeldingCode:
Expand Down Expand Up @@ -1555,6 +1559,8 @@ reverse:
type: int
City:
type: string
Cladding:
type: int
CondTag:
type: string
ConstructionQuality:
Expand Down Expand Up @@ -1645,8 +1651,6 @@ reverse:
type: string
FlexiLocWallCode:
type: string
FlexiLocWallSidingCode:
type: string
FlexiLocWaterHeaterCode:
type: string
FlexiLocWeldingCode:
Expand Down Expand Up @@ -2609,6 +2613,29 @@ reverse:
- transformation: Chimney
City:
- transformation: City
Cladding:
- transformation: |
replace(
WallSidingCode,
'0','0',
'1','1',
'2','2',
'3','3',
'4','4',
'5','5',
'6','6',
'7','7',
'8','0',
'9','0'
'10','0',
'11','0',
'12','0',
'13','0',
'14','0',
'15','0',
'16','0',
'17','0'
)
ColdFormedTubeCode:
- transformation: FlexiLocColdFormedTubeCode
ColumnBasementCode:
Expand Down Expand Up @@ -3587,8 +3614,6 @@ reverse:
- transformation: WindowProtection
WallCode:
- transformation: FlexiLocWallCode
WallSidingCode:
- transformation: FlexiLocWallSidingCode
WaterHeaterCode:
- transformation: FlexiLocWaterHeaterCode
WeldingCode:
Expand Down
4 changes: 2 additions & 2 deletions ods_tools/oed/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
from .common import (
OdsException, PANDAS_COMPRESSION_MAP, PANDAS_DEFAULT_NULL_VALUES, USUAL_FILE_NAME, OED_TYPE_TO_NAME,
OED_NAME_TO_TYPE, OED_IDENTIFIER_FIELDS, VALIDATOR_ON_ERROR_ACTION, DEFAULT_VALIDATION_CONFIG, OED_PERIL_COLUMNS, fill_empty,
UnknownColumnSaveOption, BLANK_VALUES
UnknownColumnSaveOption, BLANK_VALUES, is_empty
)


__all__ = [
'OedExposure', 'OedSchema', 'OedSource', 'ModelSettingSchema', 'AnalysisSettingSchema',
'OdsException', 'PANDAS_COMPRESSION_MAP', 'PANDAS_DEFAULT_NULL_VALUES', 'USUAL_FILE_NAME', 'OED_TYPE_TO_NAME',
'OED_NAME_TO_TYPE', 'OED_IDENTIFIER_FIELDS', 'VALIDATOR_ON_ERROR_ACTION', 'DEFAULT_VALIDATION_CONFIG', 'OED_PERIL_COLUMNS', 'fill_empty',
'UnknownColumnSaveOption', 'BLANK_VALUES'
'UnknownColumnSaveOption', 'BLANK_VALUES', 'is_empty'
]
15 changes: 14 additions & 1 deletion ods_tools/oed/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,19 @@ def __get__(self, obj, type=None):

BLANK_VALUES = {np.nan, '', None, pd.NA, pd.NaT}

dtype_to_python = {
'Int8': int,
'Int32': int,
'Int64': int,
'bytes': lambda x: bytes(x, 'utf-8'),
'float64': float,
'category': str
}


def is_empty(df, columns):
return (df[columns].isnull()) | (df[columns] == '')


def fill_empty(df, columns, value):
if isinstance(columns, str):
Expand All @@ -151,7 +164,7 @@ def fill_empty(df, columns, value):
dtype = getattr(df[column], "dtypes", getattr(df[column], "dtype", None))
if dtype.name == 'category' and value not in {None, np.nan}.union(df[column].cat.categories):
df[column] = df[column].cat.add_categories(value)
df.loc[df[column].isin(BLANK_VALUES), column] = value
df.loc[is_empty(df, column), column] = value


class UnknownColumnSaveOption(Enum):
Expand Down
3 changes: 2 additions & 1 deletion ods_tools/oed/exposure.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import json
from copy import deepcopy
import logging
import numpy as np
from packaging import version
from pathlib import Path

Expand Down Expand Up @@ -80,7 +81,7 @@ def fn(df):
if column not in df.columns or not values:
return df

return df[df[column].isin(values)]
return df[np.isin(df[column], values)]
return fn

loc_filters = [
Expand Down
21 changes: 20 additions & 1 deletion ods_tools/oed/oed_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numba as nb
import numpy as np

from .common import OdsException, BLANK_VALUES, cached_property
from .common import OdsException, BLANK_VALUES, cached_property, dtype_to_python

ENV_ODS_SCHEMA_PATH = os.getenv('ODS_SCHEMA_PATH')

Expand Down Expand Up @@ -139,6 +139,25 @@ def peril_filtering(self, peril_ids, peril_filters, include_sub_group=True):
return jit_peril_filtering(peril_ids.to_numpy().astype('str'), peril_filters.to_numpy().astype('str'),
self.nb_peril_groups_dict if include_sub_group else self.nb_perils_dict)

@staticmethod
def get_default_from_ods_fields(ods_fields, field_name):
field_info = ods_fields.get(field_name.lower())
if field_info is None:
return ''
if field_info['pd_dtype'] == 'category':
if field_info['Default'] != 'n/a':
return field_info['Default']
else:
return ''
else:
if field_info['Default'] != 'n/a':
return dtype_to_python[field_info['pd_dtype']](field_info['Default'])
else:
return np.nan

def get_default(self, field_name, oed_type='null'):
return self.get_default_from_ods_fields(self.schema['input_fields'][oed_type], field_name)

@staticmethod
def to_universal_field_name(column: str):
"""
Expand Down
13 changes: 4 additions & 9 deletions ods_tools/oed/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
import numpy as np
from chardet.universaldetector import UniversalDetector

from .common import (OED_TYPE_TO_NAME, OdsException, PANDAS_COMPRESSION_MAP, PANDAS_DEFAULT_NULL_VALUES, is_relative, BLANK_VALUES, fill_empty,
UnknownColumnSaveOption, cached_property)
from .common import (OED_TYPE_TO_NAME, OdsException, PANDAS_COMPRESSION_MAP, PANDAS_DEFAULT_NULL_VALUES, is_relative, fill_empty,
UnknownColumnSaveOption, cached_property, is_empty)
from .forex import convert_currency
from .oed_schema import OedSchema

Expand Down Expand Up @@ -282,7 +282,7 @@ def as_oed_type(cls, oed_df, column_to_field):
if oed_df[column].dtype.name == 'category' and '' not in oed_df[column].dtype.categories:
oed_df[column] = oed_df[column].cat.add_categories('')
oed_df[column] = oed_df[column] # make a copy f the col in case it is read_only
oed_df.loc[oed_df[column].isin(BLANK_VALUES), column] = ''
oed_df.loc[is_empty(oed_df, column), column] = ''
elif pd_dtype[column].startswith('Int'):
to_tmp_dtype[column] = 'float'

Expand All @@ -302,12 +302,7 @@ def prepare_df(cls, df, column_to_field, ods_fields):
"""
# set default values
for col, field_info in column_to_field.items():
if (field_info
and field_info['Default'] != 'n/a'
and (df[col].isna().any() or (field_info['pd_dtype'] == 'category' and df[col].isnull().any()))):
fill_empty(df, col, df[col].dtype.type(field_info['Default']))
elif df[col].dtype.name == 'category':
fill_empty(df, col, '')
fill_empty(df, col, OedSchema.get_default_from_ods_fields(ods_fields, col))

# add required columns that allow blank values if missing
present_field = set(field_info['Input Field Name'] for field_info in column_to_field.values())
Expand Down
25 changes: 15 additions & 10 deletions ods_tools/oed/validator.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import functools
import json
import numpy as np
import logging

from pathlib import Path
from collections.abc import Iterable

from .common import (OdsException, OED_PERIL_COLUMNS, OED_IDENTIFIER_FIELDS, DEFAULT_VALIDATION_CONFIG,
VALIDATOR_ON_ERROR_ACTION, BLANK_VALUES)
VALIDATOR_ON_ERROR_ACTION, BLANK_VALUES, is_empty)
from .oed_schema import OedSchema

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -134,7 +135,7 @@ def check_required_fields(self):
columns = [columns]
for column in columns:
if field_info.get("Allow blanks?").upper() == 'NO':
missing_value_df = oed_source.dataframe[oed_source.dataframe[column].isin(BLANK_VALUES)]
missing_value_df = oed_source.dataframe[is_empty(oed_source.dataframe, column)]
if not missing_value_df.empty:
invalid_data.append({'name': oed_source.oed_name, 'source': oed_source.current_source,
'msg': f"column '{column}' has missing values in \n"
Expand Down Expand Up @@ -222,8 +223,9 @@ def check_occupancy_code(self):
if occupancy_code_column is None:
continue
identifier_field = self.identifier_field_maps[oed_source]
invalid_occupancy_code = oed_source.dataframe[~oed_source.dataframe[occupancy_code_column].astype(str).isin(
set(self.exposure.oed_schema.schema['occupancy']) | BLANK_VALUES)]
invalid_occupancy_code = oed_source.dataframe[~(np.isin(oed_source.dataframe[occupancy_code_column].astype(str),
list(self.exposure.oed_schema.schema['occupancy']))
| is_empty(oed_source.dataframe, occupancy_code_column))]
if not invalid_occupancy_code.empty:
invalid_data.append({'name': oed_source.oed_name, 'source': oed_source.current_source,
'msg': f"invalid OccupancyCode.\n"
Expand All @@ -242,8 +244,9 @@ def check_construction_code(self):
if construction_code_column is None:
continue
identifier_field = self.identifier_field_maps[oed_source]
invalid_construction_code = oed_source.dataframe[~oed_source.dataframe[construction_code_column].astype(str).isin(
set(self.exposure.oed_schema.schema['construction']) | BLANK_VALUES)]
invalid_construction_code = oed_source.dataframe[~(np.isin(oed_source.dataframe[construction_code_column].astype(str),
list(self.exposure.oed_schema.schema['construction']))
| is_empty(oed_source.dataframe, construction_code_column))]
if not invalid_construction_code.empty:
invalid_data.append({'name': oed_source.oed_name, 'source': oed_source.current_source,
'msg': f"invalid ConstructionCode.\n"
Expand All @@ -265,8 +268,9 @@ def check_country_and_area_code(self):
identifier_field = self.identifier_field_maps[oed_source]
area_code_column = self.field_to_column_maps[oed_source].get('AreaCode')
if area_code_column is not None:
country_only_df = oed_source.dataframe[oed_source.dataframe[area_code_column].isin(BLANK_VALUES)]
country_area_df = oed_source.dataframe[~oed_source.dataframe[area_code_column].isin(BLANK_VALUES)]
country_only_df = oed_source.dataframe[is_empty(oed_source.dataframe, area_code_column)]
country_area_df = oed_source.dataframe[~is_empty(oed_source.dataframe, area_code_column)]

invalid_country_area = (country_area_df[
~(country_area_df[[country_code_column, area_code_column]]
.apply(tuple, axis=1)
Expand All @@ -279,8 +283,9 @@ def check_country_and_area_code(self):
f"{invalid_country_area[identifier_field + [country_code_column, area_code_column]]}"})
else:
country_only_df = oed_source.dataframe
invalid_country = (country_only_df[~country_only_df[country_code_column]
.isin(set(self.exposure.oed_schema.schema['country']) | BLANK_VALUES)])
invalid_country = (country_only_df[~(np.isin(country_only_df[country_code_column],
list(self.exposure.oed_schema.schema['country']))
| is_empty(country_only_df, country_code_column))])
if not invalid_country.empty:
invalid_data.append({'name': oed_source.oed_name, 'source': oed_source.current_source,
'msg': f"invalid CountryCode.\n"
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))

OED_VERSION = '3.1.0'
OED_VERSION = '3.2.0'
# ORD_VERSION =


Expand Down
Loading

0 comments on commit 6fb2a9d

Please sign in to comment.