Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release 3.2.4 (30th May 2024) #110

Merged
merged 8 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
ODS_Tools Changelog
===================

`3.2.4`_
---------
* [#103](https://github.com/OasisLMF/ODS_Tools/pull/103) - Release 3.2.3
* [#107](https://github.com/OasisLMF/ODS_Tools/pull/108) - update odtf mappings
* [#109](https://github.com/OasisLMF/ODS_Tools/pull/109) - Update OED spec to 3.2.0
* [#3](https://github.com/OasisLMF/ODS_Tools/pull/111) - model schema ord metrics
* [#112](https://github.com/OasisLMF/ODS_Tools/pull/113) - Update analysis settings schema to allow more than 9 summary groups
* [#115](https://github.com/OasisLMF/ODS_Tools/pull/115) - fix issue when categorical column have a default value
* [#117](https://github.com/OasisLMF/ODS_Tools/pull/117) - Fix/fill empty
.. _`3.2.4`: https://github.com/OasisLMF/ODS_Tools/compare/3.2.3...3.2.4

`3.2.3`_
---------
* [#98](https://github.com/OasisLMF/ODS_Tools/pull/104) - odtf outstanding issues
Expand Down
2 changes: 1 addition & 1 deletion ods_tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '3.2.3'
__version__ = '3.2.4'

import logging

Expand Down
3 changes: 1 addition & 2 deletions ods_tools/data/analysis_settings_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@
"multipleOf": 1,
"title": "Summary ID",
"description": "Identifier for the summary set.",
"minimum": 1,
"maximum": 9
"minimum": 1
},
"oed_fields": {
"type": "array",
Expand Down
3 changes: 0 additions & 3 deletions ods_tools/data/model_settings_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,6 @@
"wheatsheaf_mean_aep",
"wheatsheaf_mean_oep",
"wheatsheaf_oep",

"ord_output",
"elt_sample",
"elt_quantile",
"elt_moment",
Expand Down Expand Up @@ -514,7 +512,6 @@
"wheatsheaf_mean_aep",
"wheatsheaf_mean_oep",
"wheatsheaf_oep",
"ord_output",
"elt_sample",
"elt_quantile",
"elt_moment",
Expand Down
37 changes: 31 additions & 6 deletions ods_tools/odtf/data/mappings/mapping_loc_Cede-OED.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ forward:
type: int
City:
type: string
Cladding:
type: int
ColdFormedTube:
type: int
ColumnBasement:
Expand Down Expand Up @@ -327,6 +329,8 @@ forward:
#type: string
WallSiding:
type: int
WallSidingCode:
type: int
WallType:
type: int
WaterHeater:
Expand Down Expand Up @@ -436,6 +440,8 @@ forward:
- transformation: Chimney
City:
- transformation: City
Cladding:
- transformation: WallSidingCode
CondTag:
- transformation: SublimitArea
ConstructionQuality:
Expand Down Expand Up @@ -954,8 +960,6 @@ forward:
- transformation: UserGeocodeMatchLevel
FlexiLocWallCode:
- transformation: WallType
FlexiLocWallSidingCode:
- transformation: WallSiding
FlexiLocWaterHeaterCode:
- transformation: WaterHeater
FlexiLocWeldingCode:
Expand Down Expand Up @@ -1555,6 +1559,8 @@ reverse:
type: int
City:
type: string
Cladding:
type: int
CondTag:
type: string
ConstructionQuality:
Expand Down Expand Up @@ -1645,8 +1651,6 @@ reverse:
type: string
FlexiLocWallCode:
type: string
FlexiLocWallSidingCode:
type: string
FlexiLocWaterHeaterCode:
type: string
FlexiLocWeldingCode:
Expand Down Expand Up @@ -2609,6 +2613,29 @@ reverse:
- transformation: Chimney
City:
- transformation: City
Cladding:
- transformation: |
replace(
WallSidingCode,
'0','0',
'1','1',
'2','2',
'3','3',
'4','4',
'5','5',
'6','6',
'7','7',
'8','0',
'9','0'
'10','0',
'11','0',
'12','0',
'13','0',
'14','0',
'15','0',
'16','0',
'17','0'
)
ColdFormedTubeCode:
- transformation: FlexiLocColdFormedTubeCode
ColumnBasementCode:
Expand Down Expand Up @@ -3587,8 +3614,6 @@ reverse:
- transformation: WindowProtection
WallCode:
- transformation: FlexiLocWallCode
WallSidingCode:
- transformation: FlexiLocWallSidingCode
WaterHeaterCode:
- transformation: FlexiLocWaterHeaterCode
WeldingCode:
Expand Down
4 changes: 2 additions & 2 deletions ods_tools/oed/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
from .common import (
OdsException, PANDAS_COMPRESSION_MAP, PANDAS_DEFAULT_NULL_VALUES, USUAL_FILE_NAME, OED_TYPE_TO_NAME,
OED_NAME_TO_TYPE, OED_IDENTIFIER_FIELDS, VALIDATOR_ON_ERROR_ACTION, DEFAULT_VALIDATION_CONFIG, OED_PERIL_COLUMNS, fill_empty,
UnknownColumnSaveOption, BLANK_VALUES
UnknownColumnSaveOption, BLANK_VALUES, is_empty
)


__all__ = [
'OedExposure', 'OedSchema', 'OedSource', 'ModelSettingSchema', 'AnalysisSettingSchema',
'OdsException', 'PANDAS_COMPRESSION_MAP', 'PANDAS_DEFAULT_NULL_VALUES', 'USUAL_FILE_NAME', 'OED_TYPE_TO_NAME',
'OED_NAME_TO_TYPE', 'OED_IDENTIFIER_FIELDS', 'VALIDATOR_ON_ERROR_ACTION', 'DEFAULT_VALIDATION_CONFIG', 'OED_PERIL_COLUMNS', 'fill_empty',
'UnknownColumnSaveOption', 'BLANK_VALUES'
'UnknownColumnSaveOption', 'BLANK_VALUES', 'is_empty'
]
15 changes: 14 additions & 1 deletion ods_tools/oed/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,19 @@ def __get__(self, obj, type=None):

BLANK_VALUES = {np.nan, '', None, pd.NA, pd.NaT}

dtype_to_python = {
'Int8': int,
'Int32': int,
'Int64': int,
'bytes': lambda x: bytes(x, 'utf-8'),
'float64': float,
'category': str
}


def is_empty(df, columns):
return (df[columns].isnull()) | (df[columns] == '')


def fill_empty(df, columns, value):
if isinstance(columns, str):
Expand All @@ -151,7 +164,7 @@ def fill_empty(df, columns, value):
dtype = getattr(df[column], "dtypes", getattr(df[column], "dtype", None))
if dtype.name == 'category' and value not in {None, np.nan}.union(df[column].cat.categories):
df[column] = df[column].cat.add_categories(value)
df.loc[df[column].isin(BLANK_VALUES), column] = value
df.loc[is_empty(df, column), column] = value


class UnknownColumnSaveOption(Enum):
Expand Down
3 changes: 2 additions & 1 deletion ods_tools/oed/exposure.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import json
from copy import deepcopy
import logging
import numpy as np
from packaging import version
from pathlib import Path

Expand Down Expand Up @@ -80,7 +81,7 @@ def fn(df):
if column not in df.columns or not values:
return df

return df[df[column].isin(values)]
return df[np.isin(df[column], values)]
return fn

loc_filters = [
Expand Down
21 changes: 20 additions & 1 deletion ods_tools/oed/oed_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numba as nb
import numpy as np

from .common import OdsException, BLANK_VALUES, cached_property
from .common import OdsException, BLANK_VALUES, cached_property, dtype_to_python

ENV_ODS_SCHEMA_PATH = os.getenv('ODS_SCHEMA_PATH')

Expand Down Expand Up @@ -139,6 +139,25 @@ def peril_filtering(self, peril_ids, peril_filters, include_sub_group=True):
return jit_peril_filtering(peril_ids.to_numpy().astype('str'), peril_filters.to_numpy().astype('str'),
self.nb_peril_groups_dict if include_sub_group else self.nb_perils_dict)

@staticmethod
def get_default_from_ods_fields(ods_fields, field_name):
field_info = ods_fields.get(field_name.lower())
if field_info is None:
return ''
if field_info['pd_dtype'] == 'category':
if field_info['Default'] != 'n/a':
return field_info['Default']
else:
return ''
else:
if field_info['Default'] != 'n/a':
return dtype_to_python[field_info['pd_dtype']](field_info['Default'])
else:
return np.nan

def get_default(self, field_name, oed_type='null'):
return self.get_default_from_ods_fields(self.schema['input_fields'][oed_type], field_name)

@staticmethod
def to_universal_field_name(column: str):
"""
Expand Down
13 changes: 4 additions & 9 deletions ods_tools/oed/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
import numpy as np
from chardet.universaldetector import UniversalDetector

from .common import (OED_TYPE_TO_NAME, OdsException, PANDAS_COMPRESSION_MAP, PANDAS_DEFAULT_NULL_VALUES, is_relative, BLANK_VALUES, fill_empty,
UnknownColumnSaveOption, cached_property)
from .common import (OED_TYPE_TO_NAME, OdsException, PANDAS_COMPRESSION_MAP, PANDAS_DEFAULT_NULL_VALUES, is_relative, fill_empty,
UnknownColumnSaveOption, cached_property, is_empty)
from .forex import convert_currency
from .oed_schema import OedSchema

Expand Down Expand Up @@ -282,7 +282,7 @@ def as_oed_type(cls, oed_df, column_to_field):
if oed_df[column].dtype.name == 'category' and '' not in oed_df[column].dtype.categories:
oed_df[column] = oed_df[column].cat.add_categories('')
oed_df[column] = oed_df[column] # make a copy f the col in case it is read_only
oed_df.loc[oed_df[column].isin(BLANK_VALUES), column] = ''
oed_df.loc[is_empty(oed_df, column), column] = ''
elif pd_dtype[column].startswith('Int'):
to_tmp_dtype[column] = 'float'

Expand All @@ -302,12 +302,7 @@ def prepare_df(cls, df, column_to_field, ods_fields):
"""
# set default values
for col, field_info in column_to_field.items():
if (field_info
and field_info['Default'] != 'n/a'
and (df[col].isna().any() or (field_info['pd_dtype'] == 'category' and df[col].isnull().any()))):
fill_empty(df, col, df[col].dtype.type(field_info['Default']))
elif df[col].dtype.name == 'category':
fill_empty(df, col, '')
fill_empty(df, col, OedSchema.get_default_from_ods_fields(ods_fields, col))

# add required columns that allow blank values if missing
present_field = set(field_info['Input Field Name'] for field_info in column_to_field.values())
Expand Down
25 changes: 15 additions & 10 deletions ods_tools/oed/validator.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import functools
import json
import numpy as np
import logging

from pathlib import Path
from collections.abc import Iterable

from .common import (OdsException, OED_PERIL_COLUMNS, OED_IDENTIFIER_FIELDS, DEFAULT_VALIDATION_CONFIG,
VALIDATOR_ON_ERROR_ACTION, BLANK_VALUES)
VALIDATOR_ON_ERROR_ACTION, BLANK_VALUES, is_empty)
from .oed_schema import OedSchema

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -134,7 +135,7 @@ def check_required_fields(self):
columns = [columns]
for column in columns:
if field_info.get("Allow blanks?").upper() == 'NO':
missing_value_df = oed_source.dataframe[oed_source.dataframe[column].isin(BLANK_VALUES)]
missing_value_df = oed_source.dataframe[is_empty(oed_source.dataframe, column)]
if not missing_value_df.empty:
invalid_data.append({'name': oed_source.oed_name, 'source': oed_source.current_source,
'msg': f"column '{column}' has missing values in \n"
Expand Down Expand Up @@ -222,8 +223,9 @@ def check_occupancy_code(self):
if occupancy_code_column is None:
continue
identifier_field = self.identifier_field_maps[oed_source]
invalid_occupancy_code = oed_source.dataframe[~oed_source.dataframe[occupancy_code_column].astype(str).isin(
set(self.exposure.oed_schema.schema['occupancy']) | BLANK_VALUES)]
invalid_occupancy_code = oed_source.dataframe[~(np.isin(oed_source.dataframe[occupancy_code_column].astype(str),
list(self.exposure.oed_schema.schema['occupancy']))
| is_empty(oed_source.dataframe, occupancy_code_column))]
if not invalid_occupancy_code.empty:
invalid_data.append({'name': oed_source.oed_name, 'source': oed_source.current_source,
'msg': f"invalid OccupancyCode.\n"
Expand All @@ -242,8 +244,9 @@ def check_construction_code(self):
if construction_code_column is None:
continue
identifier_field = self.identifier_field_maps[oed_source]
invalid_construction_code = oed_source.dataframe[~oed_source.dataframe[construction_code_column].astype(str).isin(
set(self.exposure.oed_schema.schema['construction']) | BLANK_VALUES)]
invalid_construction_code = oed_source.dataframe[~(np.isin(oed_source.dataframe[construction_code_column].astype(str),
list(self.exposure.oed_schema.schema['construction']))
| is_empty(oed_source.dataframe, construction_code_column))]
if not invalid_construction_code.empty:
invalid_data.append({'name': oed_source.oed_name, 'source': oed_source.current_source,
'msg': f"invalid ConstructionCode.\n"
Expand All @@ -265,8 +268,9 @@ def check_country_and_area_code(self):
identifier_field = self.identifier_field_maps[oed_source]
area_code_column = self.field_to_column_maps[oed_source].get('AreaCode')
if area_code_column is not None:
country_only_df = oed_source.dataframe[oed_source.dataframe[area_code_column].isin(BLANK_VALUES)]
country_area_df = oed_source.dataframe[~oed_source.dataframe[area_code_column].isin(BLANK_VALUES)]
country_only_df = oed_source.dataframe[is_empty(oed_source.dataframe, area_code_column)]
country_area_df = oed_source.dataframe[~is_empty(oed_source.dataframe, area_code_column)]

invalid_country_area = (country_area_df[
~(country_area_df[[country_code_column, area_code_column]]
.apply(tuple, axis=1)
Expand All @@ -279,8 +283,9 @@ def check_country_and_area_code(self):
f"{invalid_country_area[identifier_field + [country_code_column, area_code_column]]}"})
else:
country_only_df = oed_source.dataframe
invalid_country = (country_only_df[~country_only_df[country_code_column]
.isin(set(self.exposure.oed_schema.schema['country']) | BLANK_VALUES)])
invalid_country = (country_only_df[~(np.isin(country_only_df[country_code_column],
list(self.exposure.oed_schema.schema['country']))
| is_empty(country_only_df, country_code_column))])
if not invalid_country.empty:
invalid_data.append({'name': oed_source.oed_name, 'source': oed_source.current_source,
'msg': f"invalid CountryCode.\n"
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))

OED_VERSION = '3.1.0'
OED_VERSION = '3.2.0'
# ORD_VERSION =


Expand Down
Loading