diff --git a/ods_tools/odtf/mapping/file.py b/ods_tools/odtf/mapping/file.py index ef592fe9..44ad1f72 100644 --- a/ods_tools/odtf/mapping/file.py +++ b/ods_tools/odtf/mapping/file.py @@ -434,6 +434,8 @@ def __init__( self._raw_configs: Union[None, Dict[str, RawMappingConfig]] = None self._hydrated_configs: Union[None, Dict[str, FileMappingSpec]] = None + if isinstance(search_paths, str): + search_paths = [search_paths] self.search_paths = [ *(os.path.abspath(p) for p in (search_paths or [])), os.path.abspath(standard_search_path), diff --git a/tests/mapping_test.yaml b/tests/mapping_test.yaml new file mode 100644 index 00000000..6891f67c --- /dev/null +++ b/tests/mapping_test.yaml @@ -0,0 +1,192 @@ +file_type: loc +input_format: + name: Test_input + version: "1.0.0" +output_format: + name: Test_output + version: "1.2.3" +forward: + types: + Input_int_1: + type: int + Input_int_2: + type: int + Input_string_1: + type: string + Input_string_2: + type: string + Input_multistring_1: + type: string + Input_multistring_2: + type: string + Input_float_1: + type: float + Input_float_2: + type: float + Line: + type: int + null_values: + - "''" + - Null + - NULL + transform: + Line: + - transformation: Line + Output_int_1: + - transformation: Input_int_1 + 100 + Output_int_2: + - transformation: Input_int_2 + 10 + Output_int_s: + - transformation: Input_int_1 + Output_string_1: + - transformation: | + replace( + Input_string_1, + 'letter_A','A', + 'letter_B','B', + 'letter_C','C', + 'letter_D','D', + 'letter_E','E', + 'letter_F', 'F', + 'letter_G', 'G', + 'letter_H', 'H', + 'letter_I', 'I', + 'letter_J', 'J' + ) + when: Input_string_1 is not in [Null, 'NULL'] + - transformation: "'THIS WAS NULL'" + Output_string_2: + - transformation: Input_string_2 + Output_multistring_1: + - transformation: | + replace_multiple( + Input_multistring_1, + ',', ';', + 'letter_A','A', + 'letter_B','B', + 'letter_C','C', + 'letter_D','D', + 'letter_E','E', + 'letter_F', 'F', + 'letter_G', 'G', + 'letter_H', 'H', + 'letter_I', 'I', + 'letter_J', 'J' + ) + when: Input_multistring_1 is not in [Null, 'NULL'] + - transformation: "'THIS WAS NULL'" + Output_multistring_1s: + - transformation: Input_multistring_1 + Output_multistring_2: + - transformation: + replace_multiple( + Input_multistring_2, + ',', ';', + 'UK','United Kingdom', + 'ITA','Italy', + 'FRA','France', + 'DEU','Germany', + 'USA','United States', + 'BRA', 'Brasil', + 'SWE', 'Sweden', + 'ESP', 'Spain', + 'SWI', 'Switzerland', + 'ARG', 'Argentina' + ) + when: Input_multistring_2 is not in [Null, 'NULL'] + Output_multistring_2s: + - transformation: Input_multistring_2 + Output_float_1: + - transformation: Input_float_1 * 3.14 + Output_float_2: + - transformation: Input_float_2 / 3.14 + Output_float_s: + - transformation: Input_float_1 +reverse: + types: + Line: + type: int + Output_int_1: + type: int + Output_int_2: + type: int + Output_string_1: + type: string + Output_string_2: + type: string + Output_multistring_1: + type: string + Output_multistring_2: + type: string + Output_float_1: + type: float + Output_float_2: + type: float + null_values: + - "''" + - Null + - NULL + transform: + Line: + - transformation: Line + Input_int_1: + - transformation: Output_int_1 - 100 + Input_int_2: + - transformation: Output_int_2 - 10 + Input_string_1: + - transformation: | + replace( + Output_string_1, + 'letter_A','A', + 'letter_B','B', + 'letter_C','C', + 'letter_D','D', + 'letter_E','E', + 'letter_F', 'F', + 'letter_G', 'G', + 'letter_H', 'H', + 'letter_I', 'I', + 'letter_J', 'J' + ) + when: Output_string_1 is not in [Null, 'NULL'] + Input_string_2: + - transformation: Output_string_2 + Input_multistring_1: + - transformation: | + replace_multiple( + Output_multistring_1, + ';', ',', + 'letter_A','A', + 'letter_B','B', + 'letter_C','C', + 'letter_D','D', + 'letter_E','E', + 'letter_F', 'F', + 'letter_G', 'G', + 'letter_H', 'H', + 'letter_I', 'I', + 'letter_J', 'J' + ) + when: Output_multistring_1 is not in [Null, 'NULL'] + Input_multistring_2: + - transformation: + replace_multiple( + Output_multistring_2, + ';', ',', + 'United Kingdom','UK', + 'Italy','ITA', + 'France','FRA', + 'Germany','DEU', + 'United + States','USA', + 'Brasil', 'BRA', + 'Sweden', 'SWE', + 'Spain', 'ESP', + 'Switzerland', 'SWI', + 'Argentina', 'ARG' + ) + when: Output_multistring_2 is not in [Null, 'NULL'] + Input_float_1: + - transformation: Output_float_1 / 3.14 + Input_float_2: + - transformation: Output_float_2 * 3.14 \ No newline at end of file diff --git a/tests/t_input.csv b/tests/t_input.csv new file mode 100644 index 00000000..4c8dd86f --- /dev/null +++ b/tests/t_input.csv @@ -0,0 +1,11 @@ +Line,Input_int_1,Input_int_2,Input_string_1,Input_string_2,Input_multistring_1,Input_multistring_2,Input_float_1,Input_float_2 +1,10,10,letter_A,letter_A,"letter_A, letter_B, letter_C","UK, ITA",0.5,0.5 +2,20,20,letter_B,letter_B,"letter_A, letter_J","DEU, BRA",1.5,1.5 +3,11,11,letter_C,letter_C,"letter_E, letter_C","FRA,FRA",2.5,2.5 +4,13,13,letter_D,letter_D,letter_H,SWE,3.6,3.6 +5,17,17,letter_E,letter_E,,"ESP, SWE",4.8,4.8 +6,55,55,letter_F,letter_F,"letter_C, letter_I, letter_A",ARG,5.2,5.2 +7,101,101,letter_G,letter_G,"letter_B, letter_E, letter_E",,7.9,7.9 +8,999,999,letter_H,letter_H,"letter_J, letter_I, letter_I","USA, UK",111.11,111.11 +9,777,777,letter_I,letter_I,"letter_G, letter_I, letter_G",Null,0.001,0.001 +10,1,1,,,"letter_B, letter_A, letter_G","ARG, BRA, USA",, diff --git a/tests/test_ods_package.py b/tests/test_ods_package.py index 4d7f2ca3..5ba45f17 100644 --- a/tests/test_ods_package.py +++ b/tests/test_ods_package.py @@ -55,6 +55,10 @@ def _is_non_empty_file(fp): return os.path.getsize(fp) > 0 +def strip_quotes(s): + return s.strip('"') if isinstance(s, str) else s + + class OdsPackageTests(TestCase): @pytest.fixture(autouse=True) def logging_fixtures(self, caplog): @@ -878,101 +882,41 @@ def test_all_analysis_options__in_valid_metrics(self): self.assertEqual(expected_list, global__valid_output_metrics) self.assertEqual(expected_list, event_set__valid_metrics) - def test_transformation_as_expected_loc(self): + def test_transformation_as_expected(self): with tempfile.TemporaryDirectory() as tmp_dir: # Prepare the necessary files for the test config_file_path = pathlib.Path(tmp_dir, 'config.yaml') + with open(config_file_path, 'w') as config_file: yaml.dump({ 'transformations': { 'loc': { 'input_format': { - 'name': 'Cede_Location', - 'version': '10.0.0' + 'name': 'Test_input', + 'version': '1.0.0' }, 'output_format': { - 'name': 'OED_Location', - 'version': '3.0.2' + 'name': 'Test_output', + 'version': '1.2.3' }, 'runner': { 'batch_size': 10000 }, - 'extractor': { + 'mapping': { 'options': { - 'path': str(pathlib.Path(base_test_path, 'loctest_transform_input.csv')), - 'quoting': 'minimal' + 'search_paths': str(pathlib.Path(base_test_path)) } }, - 'loader': { - 'options': { - 'path': str(pathlib.Path(tmp_dir, 'oed_location_output.csv')), - 'quoting': 'minimal' - } - } - } - } - }, config_file) - - # Run the transformation - transform_result = transform_format(str(config_file_path)) - - # Assert the transformation result - assert len(transform_result) == 1 - assert transform_result[0][0] == str(pathlib.Path(tmp_dir, 'oed_location_output.csv')) - assert transform_result[0][1] == 'location' - - output_df = pd.read_csv(transform_result[0][0]) - # expected_output = pd.read_csv(str(pathlib.Path(base_test_path, 'loctest_transform_output.csv'))) - # pd.testing.assert_frame_equal(output_df, expected_output) - expected_values = { - 'AccNumber': [1, 2, 3, 4], - 'ContentsTIV': [4502825, 409903, 5980828, 5219727], - 'FloorArea': [10, 20, 30, 40], - 'LocPeril': ['"XHL;XLT;XSL;WTC;XTD;ZST"', '"XSL;WTC"', '"XTD;ZST"', '"XHL;XLT"'], - 'OccupancyCode': [1104, 1104, 1104, 1104] - } - for column, values in expected_values.items(): - assert output_df[column].tolist() == values - - def test_transformation_as_expected_acc(self): - with tempfile.TemporaryDirectory() as tmp_dir: - # Create a temporary CSV file with the input data for 'acc' - input_acc_path = pathlib.Path(tmp_dir, 'input_acc.csv') - with open(input_acc_path, 'w') as input_acc_file: - input_acc_file.write( - "ContractID,InceptionDate,ExpirationDate,Perils,LayerID,LayerPerils,DedAmt1,AttachmentAmt,SublimitPerils\n" - "1253900,2021-11-29,2022-11-28,4334220,2349611,4334220,25000,50000000,CF\n" - "1253900,2021-11-29,2022-11-28,4334220,2349611,4334220,25000,50000000,CH\n" - "1253901,2021-11-01,2022-10-31,4334220,2349615,4334220,500000,225000000,EQ\n" - ) - - # Prepare the necessary files for the test - config_file_path = pathlib.Path(tmp_dir, 'config.yaml') - with open(config_file_path, 'w') as config_file: - yaml.dump({ - 'transformations': { - 'acc': { - 'input_format': { - 'name': 'Cede_Contract', - 'version': '10.0.0' - }, - 'output_format': { - 'name': 'OED_Contract', - 'version': '3.0.2' - }, - 'runner': { - 'batch_size': 10000 - }, 'extractor': { 'options': { - 'path': str(input_acc_path), + 'path': str(pathlib.Path(base_test_path, 't_input.csv')), 'quoting': 'minimal' } }, 'loader': { 'options': { - 'path': str(pathlib.Path(tmp_dir, 'oed_account_output.csv')), + 'path': str(pathlib.Path(tmp_dir, 't_output.csv')), 'quoting': 'minimal' } } @@ -985,15 +929,28 @@ def test_transformation_as_expected_acc(self): # Assert the transformation result assert len(transform_result) == 1 - assert transform_result[0][0] == str(pathlib.Path(tmp_dir, 'oed_account_output.csv')) - assert transform_result[0][1] == 'account' + assert transform_result[0][0] == str(pathlib.Path(tmp_dir, 't_output.csv')) + assert transform_result[0][1] == 'other' - # Perform assertions on specific columns in the output file output_df = pd.read_csv(transform_result[0][0]) - expected_output = pd.DataFrame({ - 'AccNumber': ['1253900', '1253900', '1253901'], - 'AccPeril': ['4334220', '4334220', '4334220'], - 'CondPeril': ['WSS', 'XCH', 'QEQ'], - 'LayerAttachment': ['50000000.0', '50000000.0', '225000000.0'] - }) - pd.testing.assert_frame_equal(output_df[expected_output.columns].astype(str), expected_output.astype(str)) + + expected_values = { + 'Output_int_1': [110, 120, 111, 113, 117, 155, 201, 1099, 877, 101], + 'Output_int_2': [20, 30, 21, 23, 27, 65, 111, 1009, 787, 11], + 'Output_string_1': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', ''], + 'Output_float_1': [1.57, 4.71, 7.85, 11.304, 15.072, 16.328, 24.806, 348.8854, 0.00314, np.nan], + 'Output_float_2': [0.159235668789809, 0.477707006369427, 0.796178343949045, 1.14649681528662, + 1.52866242038217, 1.65605095541401, 2.51592356687898, 35.3853503184713, + 0.000318471337579618, np.nan], + 'Output_multistring_1': ["A;B;C", "A;J", "E;C", 'H', '', "C;I;A", "B;E;E", "J;I;I", "G;I;G", "B;A;G"], + 'Output_multistring_2': ["United Kingdom;Italy", "Germany;Brasil", "France;France", "Sweden", + "Spain;Sweden", "Argentina", '', "United States;United Kingdom", "Null", + "Argentina;Brasil;United States"] + } + for column, values in expected_values.items(): + if 'float' in column.lower(): + assert np.allclose(output_df[column].tolist(), values, equal_nan=True, rtol=1e-5, atol=1e-5) + elif 'string' in column.lower(): + assert [strip_quotes(s) for s in output_df[column].fillna('').tolist()] == values + else: + assert output_df[column].tolist() == values