Skip to content

Commit

Permalink
fix/odtf config mappings (#132)
Browse files Browse the repository at this point in the history
* accept single additional path as string pointing to folder

* test custom mappings

* test transform with test mapping file

* pep
  • Loading branch information
ncerutti authored Aug 5, 2024
1 parent 0ef3d35 commit f50d82f
Show file tree
Hide file tree
Showing 4 changed files with 242 additions and 80 deletions.
2 changes: 2 additions & 0 deletions ods_tools/odtf/mapping/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,8 @@ def __init__(

self._raw_configs: Union[None, Dict[str, RawMappingConfig]] = None
self._hydrated_configs: Union[None, Dict[str, FileMappingSpec]] = None
if isinstance(search_paths, str):
search_paths = [search_paths]
self.search_paths = [
*(os.path.abspath(p) for p in (search_paths or [])),
os.path.abspath(standard_search_path),
Expand Down
192 changes: 192 additions & 0 deletions tests/mapping_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
file_type: loc
input_format:
name: Test_input
version: "1.0.0"
output_format:
name: Test_output
version: "1.2.3"
forward:
types:
Input_int_1:
type: int
Input_int_2:
type: int
Input_string_1:
type: string
Input_string_2:
type: string
Input_multistring_1:
type: string
Input_multistring_2:
type: string
Input_float_1:
type: float
Input_float_2:
type: float
Line:
type: int
null_values:
- "''"
- Null
- NULL
transform:
Line:
- transformation: Line
Output_int_1:
- transformation: Input_int_1 + 100
Output_int_2:
- transformation: Input_int_2 + 10
Output_int_s:
- transformation: Input_int_1
Output_string_1:
- transformation: |
replace(
Input_string_1,
'letter_A','A',
'letter_B','B',
'letter_C','C',
'letter_D','D',
'letter_E','E',
'letter_F', 'F',
'letter_G', 'G',
'letter_H', 'H',
'letter_I', 'I',
'letter_J', 'J'
)
when: Input_string_1 is not in [Null, 'NULL']
- transformation: "'THIS WAS NULL'"
Output_string_2:
- transformation: Input_string_2
Output_multistring_1:
- transformation: |
replace_multiple(
Input_multistring_1,
',', ';',
'letter_A','A',
'letter_B','B',
'letter_C','C',
'letter_D','D',
'letter_E','E',
'letter_F', 'F',
'letter_G', 'G',
'letter_H', 'H',
'letter_I', 'I',
'letter_J', 'J'
)
when: Input_multistring_1 is not in [Null, 'NULL']
- transformation: "'THIS WAS NULL'"
Output_multistring_1s:
- transformation: Input_multistring_1
Output_multistring_2:
- transformation:
replace_multiple(
Input_multistring_2,
',', ';',
'UK','United Kingdom',
'ITA','Italy',
'FRA','France',
'DEU','Germany',
'USA','United States',
'BRA', 'Brasil',
'SWE', 'Sweden',
'ESP', 'Spain',
'SWI', 'Switzerland',
'ARG', 'Argentina'
)
when: Input_multistring_2 is not in [Null, 'NULL']
Output_multistring_2s:
- transformation: Input_multistring_2
Output_float_1:
- transformation: Input_float_1 * 3.14
Output_float_2:
- transformation: Input_float_2 / 3.14
Output_float_s:
- transformation: Input_float_1
reverse:
types:
Line:
type: int
Output_int_1:
type: int
Output_int_2:
type: int
Output_string_1:
type: string
Output_string_2:
type: string
Output_multistring_1:
type: string
Output_multistring_2:
type: string
Output_float_1:
type: float
Output_float_2:
type: float
null_values:
- "''"
- Null
- NULL
transform:
Line:
- transformation: Line
Input_int_1:
- transformation: Output_int_1 - 100
Input_int_2:
- transformation: Output_int_2 - 10
Input_string_1:
- transformation: |
replace(
Output_string_1,
'letter_A','A',
'letter_B','B',
'letter_C','C',
'letter_D','D',
'letter_E','E',
'letter_F', 'F',
'letter_G', 'G',
'letter_H', 'H',
'letter_I', 'I',
'letter_J', 'J'
)
when: Output_string_1 is not in [Null, 'NULL']
Input_string_2:
- transformation: Output_string_2
Input_multistring_1:
- transformation: |
replace_multiple(
Output_multistring_1,
';', ',',
'letter_A','A',
'letter_B','B',
'letter_C','C',
'letter_D','D',
'letter_E','E',
'letter_F', 'F',
'letter_G', 'G',
'letter_H', 'H',
'letter_I', 'I',
'letter_J', 'J'
)
when: Output_multistring_1 is not in [Null, 'NULL']
Input_multistring_2:
- transformation:
replace_multiple(
Output_multistring_2,
';', ',',
'United Kingdom','UK',
'Italy','ITA',
'France','FRA',
'Germany','DEU',
'United
States','USA',
'Brasil', 'BRA',
'Sweden', 'SWE',
'Spain', 'ESP',
'Switzerland', 'SWI',
'Argentina', 'ARG'
)
when: Output_multistring_2 is not in [Null, 'NULL']
Input_float_1:
- transformation: Output_float_1 / 3.14
Input_float_2:
- transformation: Output_float_2 * 3.14
11 changes: 11 additions & 0 deletions tests/t_input.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Line,Input_int_1,Input_int_2,Input_string_1,Input_string_2,Input_multistring_1,Input_multistring_2,Input_float_1,Input_float_2
1,10,10,letter_A,letter_A,"letter_A, letter_B, letter_C","UK, ITA",0.5,0.5
2,20,20,letter_B,letter_B,"letter_A, letter_J","DEU, BRA",1.5,1.5
3,11,11,letter_C,letter_C,"letter_E, letter_C","FRA,FRA",2.5,2.5
4,13,13,letter_D,letter_D,letter_H,SWE,3.6,3.6
5,17,17,letter_E,letter_E,,"ESP, SWE",4.8,4.8
6,55,55,letter_F,letter_F,"letter_C, letter_I, letter_A",ARG,5.2,5.2
7,101,101,letter_G,letter_G,"letter_B, letter_E, letter_E",,7.9,7.9
8,999,999,letter_H,letter_H,"letter_J, letter_I, letter_I","USA, UK",111.11,111.11
9,777,777,letter_I,letter_I,"letter_G, letter_I, letter_G",Null,0.001,0.001
10,1,1,,,"letter_B, letter_A, letter_G","ARG, BRA, USA",,
117 changes: 37 additions & 80 deletions tests/test_ods_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ def _is_non_empty_file(fp):
return os.path.getsize(fp) > 0


def strip_quotes(s):
return s.strip('"') if isinstance(s, str) else s


class OdsPackageTests(TestCase):
@pytest.fixture(autouse=True)
def logging_fixtures(self, caplog):
Expand Down Expand Up @@ -878,101 +882,41 @@ def test_all_analysis_options__in_valid_metrics(self):
self.assertEqual(expected_list, global__valid_output_metrics)
self.assertEqual(expected_list, event_set__valid_metrics)

def test_transformation_as_expected_loc(self):
def test_transformation_as_expected(self):
with tempfile.TemporaryDirectory() as tmp_dir:

# Prepare the necessary files for the test
config_file_path = pathlib.Path(tmp_dir, 'config.yaml')

with open(config_file_path, 'w') as config_file:
yaml.dump({
'transformations': {
'loc': {
'input_format': {
'name': 'Cede_Location',
'version': '10.0.0'
'name': 'Test_input',
'version': '1.0.0'
},
'output_format': {
'name': 'OED_Location',
'version': '3.0.2'
'name': 'Test_output',
'version': '1.2.3'
},
'runner': {
'batch_size': 10000
},
'extractor': {
'mapping': {
'options': {
'path': str(pathlib.Path(base_test_path, 'loctest_transform_input.csv')),
'quoting': 'minimal'
'search_paths': str(pathlib.Path(base_test_path))
}
},
'loader': {
'options': {
'path': str(pathlib.Path(tmp_dir, 'oed_location_output.csv')),
'quoting': 'minimal'
}
}
}
}
}, config_file)

# Run the transformation
transform_result = transform_format(str(config_file_path))

# Assert the transformation result
assert len(transform_result) == 1
assert transform_result[0][0] == str(pathlib.Path(tmp_dir, 'oed_location_output.csv'))
assert transform_result[0][1] == 'location'

output_df = pd.read_csv(transform_result[0][0])
# expected_output = pd.read_csv(str(pathlib.Path(base_test_path, 'loctest_transform_output.csv')))
# pd.testing.assert_frame_equal(output_df, expected_output)
expected_values = {
'AccNumber': [1, 2, 3, 4],
'ContentsTIV': [4502825, 409903, 5980828, 5219727],
'FloorArea': [10, 20, 30, 40],
'LocPeril': ['"XHL;XLT;XSL;WTC;XTD;ZST"', '"XSL;WTC"', '"XTD;ZST"', '"XHL;XLT"'],
'OccupancyCode': [1104, 1104, 1104, 1104]
}
for column, values in expected_values.items():
assert output_df[column].tolist() == values

def test_transformation_as_expected_acc(self):
with tempfile.TemporaryDirectory() as tmp_dir:
# Create a temporary CSV file with the input data for 'acc'
input_acc_path = pathlib.Path(tmp_dir, 'input_acc.csv')
with open(input_acc_path, 'w') as input_acc_file:
input_acc_file.write(
"ContractID,InceptionDate,ExpirationDate,Perils,LayerID,LayerPerils,DedAmt1,AttachmentAmt,SublimitPerils\n"
"1253900,2021-11-29,2022-11-28,4334220,2349611,4334220,25000,50000000,CF\n"
"1253900,2021-11-29,2022-11-28,4334220,2349611,4334220,25000,50000000,CH\n"
"1253901,2021-11-01,2022-10-31,4334220,2349615,4334220,500000,225000000,EQ\n"
)

# Prepare the necessary files for the test
config_file_path = pathlib.Path(tmp_dir, 'config.yaml')
with open(config_file_path, 'w') as config_file:
yaml.dump({
'transformations': {
'acc': {
'input_format': {
'name': 'Cede_Contract',
'version': '10.0.0'
},
'output_format': {
'name': 'OED_Contract',
'version': '3.0.2'
},
'runner': {
'batch_size': 10000
},
'extractor': {
'options': {
'path': str(input_acc_path),
'path': str(pathlib.Path(base_test_path, 't_input.csv')),
'quoting': 'minimal'
}
},
'loader': {
'options': {
'path': str(pathlib.Path(tmp_dir, 'oed_account_output.csv')),
'path': str(pathlib.Path(tmp_dir, 't_output.csv')),
'quoting': 'minimal'
}
}
Expand All @@ -985,15 +929,28 @@ def test_transformation_as_expected_acc(self):

# Assert the transformation result
assert len(transform_result) == 1
assert transform_result[0][0] == str(pathlib.Path(tmp_dir, 'oed_account_output.csv'))
assert transform_result[0][1] == 'account'
assert transform_result[0][0] == str(pathlib.Path(tmp_dir, 't_output.csv'))
assert transform_result[0][1] == 'other'

# Perform assertions on specific columns in the output file
output_df = pd.read_csv(transform_result[0][0])
expected_output = pd.DataFrame({
'AccNumber': ['1253900', '1253900', '1253901'],
'AccPeril': ['4334220', '4334220', '4334220'],
'CondPeril': ['WSS', 'XCH', 'QEQ'],
'LayerAttachment': ['50000000.0', '50000000.0', '225000000.0']
})
pd.testing.assert_frame_equal(output_df[expected_output.columns].astype(str), expected_output.astype(str))

expected_values = {
'Output_int_1': [110, 120, 111, 113, 117, 155, 201, 1099, 877, 101],
'Output_int_2': [20, 30, 21, 23, 27, 65, 111, 1009, 787, 11],
'Output_string_1': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', ''],
'Output_float_1': [1.57, 4.71, 7.85, 11.304, 15.072, 16.328, 24.806, 348.8854, 0.00314, np.nan],
'Output_float_2': [0.159235668789809, 0.477707006369427, 0.796178343949045, 1.14649681528662,
1.52866242038217, 1.65605095541401, 2.51592356687898, 35.3853503184713,
0.000318471337579618, np.nan],
'Output_multistring_1': ["A;B;C", "A;J", "E;C", 'H', '', "C;I;A", "B;E;E", "J;I;I", "G;I;G", "B;A;G"],
'Output_multistring_2': ["United Kingdom;Italy", "Germany;Brasil", "France;France", "Sweden",
"Spain;Sweden", "Argentina", '', "United States;United Kingdom", "Null",
"Argentina;Brasil;United States"]
}
for column, values in expected_values.items():
if 'float' in column.lower():
assert np.allclose(output_df[column].tolist(), values, equal_nan=True, rtol=1e-5, atol=1e-5)
elif 'string' in column.lower():
assert [strip_quotes(s) for s in output_df[column].fillna('').tolist()] == values
else:
assert output_df[column].tolist() == values

0 comments on commit f50d82f

Please sign in to comment.