fix/odtf config mappings (#132)

* accept single additional path as string pointing to folder * test custom mappings * test transform with test mapping file * pep
OasisLMF · Aug 5, 2024 · f50d82f · f50d82f
1 parent 0ef3d35
commit f50d82f
Show file tree

Hide file tree

Showing 4 changed files with 242 additions and 80 deletions.
diff --git a/ods_tools/odtf/mapping/file.py b/ods_tools/odtf/mapping/file.py
@@ -434,6 +434,8 @@ def __init__(
 
         self._raw_configs: Union[None, Dict[str, RawMappingConfig]] = None
         self._hydrated_configs: Union[None, Dict[str, FileMappingSpec]] = None
+        if isinstance(search_paths, str):
+            search_paths = [search_paths]
         self.search_paths = [
             *(os.path.abspath(p) for p in (search_paths or [])),
             os.path.abspath(standard_search_path),

diff --git a/tests/mapping_test.yaml b/tests/mapping_test.yaml
@@ -0,0 +1,192 @@
+file_type: loc
+input_format:
+  name: Test_input
+  version: "1.0.0"
+output_format:
+  name: Test_output
+  version: "1.2.3"
+forward:
+  types:
+    Input_int_1:
+      type: int
+    Input_int_2:
+      type: int
+    Input_string_1:
+      type: string
+    Input_string_2:
+      type: string
+    Input_multistring_1:
+      type: string
+    Input_multistring_2:
+      type: string
+    Input_float_1:
+      type: float
+    Input_float_2:
+      type: float
+    Line:
+      type: int
+  null_values:
+    - "''"
+    - Null
+    - NULL
+  transform:
+    Line:
+      - transformation: Line
+    Output_int_1:
+     - transformation: Input_int_1 + 100
+    Output_int_2:
+      - transformation: Input_int_2 + 10
+    Output_int_s:
+      - transformation: Input_int_1
+    Output_string_1:
+      - transformation: |
+          replace(
+            Input_string_1,
+            'letter_A','A',
+            'letter_B','B',
+            'letter_C','C',
+            'letter_D','D',
+            'letter_E','E',
+            'letter_F', 'F',
+            'letter_G', 'G',
+            'letter_H', 'H',
+            'letter_I', 'I',
+            'letter_J', 'J'
+          )
+        when: Input_string_1 is not in [Null, 'NULL']
+      - transformation: "'THIS WAS NULL'"
+    Output_string_2:
+      - transformation: Input_string_2
+    Output_multistring_1:
+      - transformation: |
+          replace_multiple(
+            Input_multistring_1,
+            ',', ';',
+            'letter_A','A',
+            'letter_B','B',
+            'letter_C','C',
+            'letter_D','D',
+            'letter_E','E',
+            'letter_F', 'F',
+            'letter_G', 'G',
+            'letter_H', 'H',
+            'letter_I', 'I',
+            'letter_J', 'J'
+          )
+        when: Input_multistring_1 is not in [Null, 'NULL']
+      - transformation: "'THIS WAS NULL'"
+    Output_multistring_1s:
+      - transformation: Input_multistring_1
+    Output_multistring_2:
+      - transformation:
+          replace_multiple(
+            Input_multistring_2,
+            ',', ';',
+            'UK','United Kingdom',
+            'ITA','Italy',
+            'FRA','France',
+            'DEU','Germany',
+            'USA','United States',
+            'BRA', 'Brasil',
+            'SWE', 'Sweden',
+            'ESP', 'Spain',
+            'SWI', 'Switzerland',
+            'ARG', 'Argentina'
+          )
+        when: Input_multistring_2 is not in [Null, 'NULL']
+    Output_multistring_2s:
+      - transformation: Input_multistring_2
+    Output_float_1:
+      - transformation: Input_float_1 * 3.14
+    Output_float_2:
+      - transformation: Input_float_2 / 3.14
+    Output_float_s:
+      - transformation: Input_float_1
+reverse:
+  types:
+    Line:
+      type: int
+    Output_int_1:
+      type: int
+    Output_int_2:
+      type: int
+    Output_string_1:
+      type: string
+    Output_string_2:
+      type: string
+    Output_multistring_1:
+      type: string
+    Output_multistring_2:
+      type: string
+    Output_float_1:
+      type: float
+    Output_float_2:
+      type: float
+  null_values:
+    - "''"
+    - Null
+    - NULL
+  transform:
+    Line:
+      - transformation: Line
+    Input_int_1:
+      - transformation: Output_int_1 - 100
+    Input_int_2:
+      - transformation: Output_int_2 - 10
+    Input_string_1:
+      - transformation: |
+          replace(
+            Output_string_1,
+            'letter_A','A',
+            'letter_B','B',
+            'letter_C','C',
+            'letter_D','D',
+            'letter_E','E',
+            'letter_F', 'F',
+            'letter_G', 'G',
+            'letter_H', 'H',
+            'letter_I', 'I',
+            'letter_J', 'J'
+          )
+        when: Output_string_1 is not in [Null, 'NULL']
+    Input_string_2:
+      - transformation: Output_string_2
+    Input_multistring_1:
+      - transformation: |
+          replace_multiple(
+            Output_multistring_1,
+            ';', ',',
+            'letter_A','A',
+            'letter_B','B',
+            'letter_C','C',
+            'letter_D','D',
+            'letter_E','E',
+            'letter_F', 'F',
+            'letter_G', 'G',
+            'letter_H', 'H',
+            'letter_I', 'I',
+            'letter_J', 'J'
+          )
+        when: Output_multistring_1 is not in [Null, 'NULL']
+    Input_multistring_2:
+      - transformation:
+          replace_multiple(
+            Output_multistring_2,
+            ';', ',',
+            'United Kingdom','UK',
+            'Italy','ITA',
+            'France','FRA',
+            'Germany','DEU',
+            'United
+            States','USA',
+            'Brasil', 'BRA',
+            'Sweden', 'SWE',
+            'Spain', 'ESP',
+            'Switzerland', 'SWI',
+            'Argentina', 'ARG'
+          )
+        when: Output_multistring_2 is not in [Null, 'NULL']
+    Input_float_1:
+      - transformation: Output_float_1 / 3.14
+    Input_float_2:
+      - transformation: Output_float_2 * 3.14
diff --git a/tests/t_input.csv b/tests/t_input.csv
@@ -0,0 +1,11 @@
+Line,Input_int_1,Input_int_2,Input_string_1,Input_string_2,Input_multistring_1,Input_multistring_2,Input_float_1,Input_float_2
+1,10,10,letter_A,letter_A,"letter_A, letter_B, letter_C","UK, ITA",0.5,0.5
+2,20,20,letter_B,letter_B,"letter_A, letter_J","DEU, BRA",1.5,1.5
+3,11,11,letter_C,letter_C,"letter_E, letter_C","FRA,FRA",2.5,2.5
+4,13,13,letter_D,letter_D,letter_H,SWE,3.6,3.6
+5,17,17,letter_E,letter_E,,"ESP, SWE",4.8,4.8
+6,55,55,letter_F,letter_F,"letter_C, letter_I, letter_A",ARG,5.2,5.2
+7,101,101,letter_G,letter_G,"letter_B, letter_E, letter_E",,7.9,7.9
+8,999,999,letter_H,letter_H,"letter_J, letter_I, letter_I","USA, UK",111.11,111.11
+9,777,777,letter_I,letter_I,"letter_G, letter_I, letter_G",Null,0.001,0.001
+10,1,1,,,"letter_B, letter_A, letter_G","ARG, BRA, USA",,
diff --git a/tests/test_ods_package.py b/tests/test_ods_package.py
@@ -55,6 +55,10 @@ def _is_non_empty_file(fp):
     return os.path.getsize(fp) > 0
 
 
+def strip_quotes(s):
+    return s.strip('"') if isinstance(s, str) else s
+
+
 class OdsPackageTests(TestCase):
     @pytest.fixture(autouse=True)
     def logging_fixtures(self, caplog):
@@ -878,101 +882,41 @@ def test_all_analysis_options__in_valid_metrics(self):
         self.assertEqual(expected_list, global__valid_output_metrics)
         self.assertEqual(expected_list, event_set__valid_metrics)
 
-    def test_transformation_as_expected_loc(self):
+    def test_transformation_as_expected(self):
         with tempfile.TemporaryDirectory() as tmp_dir:
 
             # Prepare the necessary files for the test
             config_file_path = pathlib.Path(tmp_dir, 'config.yaml')
+
             with open(config_file_path, 'w') as config_file:
                 yaml.dump({
                     'transformations': {
                         'loc': {
                             'input_format': {
-                                'name': 'Cede_Location',
-                                'version': '10.0.0'
+                                'name': 'Test_input',
+                                'version': '1.0.0'
                             },
                             'output_format': {
-                                'name': 'OED_Location',
-                                'version': '3.0.2'
+                                'name': 'Test_output',
+                                'version': '1.2.3'
                             },
                             'runner': {
                                 'batch_size': 10000
                             },
-                            'extractor': {
+                            'mapping': {
                                 'options': {
-                                    'path': str(pathlib.Path(base_test_path, 'loctest_transform_input.csv')),
-                                    'quoting': 'minimal'
+                                    'search_paths': str(pathlib.Path(base_test_path))
                                 }
                             },
-                            'loader': {
-                                'options': {
-                                    'path': str(pathlib.Path(tmp_dir, 'oed_location_output.csv')),
-                                    'quoting': 'minimal'
-                                }
-                            }
-                        }
-                    }
-                }, config_file)
-
-            # Run the transformation
-            transform_result = transform_format(str(config_file_path))
-
-            # Assert the transformation result
-            assert len(transform_result) == 1
-            assert transform_result[0][0] == str(pathlib.Path(tmp_dir, 'oed_location_output.csv'))
-            assert transform_result[0][1] == 'location'
-
-            output_df = pd.read_csv(transform_result[0][0])
-            # expected_output = pd.read_csv(str(pathlib.Path(base_test_path, 'loctest_transform_output.csv')))
-            # pd.testing.assert_frame_equal(output_df, expected_output)
-            expected_values = {
-                'AccNumber': [1, 2, 3, 4],
-                'ContentsTIV': [4502825, 409903, 5980828, 5219727],
-                'FloorArea': [10, 20, 30, 40],
-                'LocPeril': ['"XHL;XLT;XSL;WTC;XTD;ZST"', '"XSL;WTC"', '"XTD;ZST"', '"XHL;XLT"'],
-                'OccupancyCode': [1104, 1104, 1104, 1104]
-            }
-        for column, values in expected_values.items():
-            assert output_df[column].tolist() == values
-
-    def test_transformation_as_expected_acc(self):
-        with tempfile.TemporaryDirectory() as tmp_dir:
-            # Create a temporary CSV file with the input data for 'acc'
-            input_acc_path = pathlib.Path(tmp_dir, 'input_acc.csv')
-            with open(input_acc_path, 'w') as input_acc_file:
-                input_acc_file.write(
-                    "ContractID,InceptionDate,ExpirationDate,Perils,LayerID,LayerPerils,DedAmt1,AttachmentAmt,SublimitPerils\n"
-                    "1253900,2021-11-29,2022-11-28,4334220,2349611,4334220,25000,50000000,CF\n"
-                    "1253900,2021-11-29,2022-11-28,4334220,2349611,4334220,25000,50000000,CH\n"
-                    "1253901,2021-11-01,2022-10-31,4334220,2349615,4334220,500000,225000000,EQ\n"
-                )
-
-            # Prepare the necessary files for the test
-            config_file_path = pathlib.Path(tmp_dir, 'config.yaml')
-            with open(config_file_path, 'w') as config_file:
-                yaml.dump({
-                    'transformations': {
-                        'acc': {
-                            'input_format': {
-                                'name': 'Cede_Contract',
-                                'version': '10.0.0'
-                            },
-                            'output_format': {
-                                'name': 'OED_Contract',
-                                'version': '3.0.2'
-                            },
-                            'runner': {
-                                'batch_size': 10000
-                            },
                             'extractor': {
                                 'options': {
-                                    'path': str(input_acc_path),
+                                    'path': str(pathlib.Path(base_test_path, 't_input.csv')),
                                     'quoting': 'minimal'
                                 }
                             },
                             'loader': {
                                 'options': {
-                                    'path': str(pathlib.Path(tmp_dir, 'oed_account_output.csv')),
+                                    'path': str(pathlib.Path(tmp_dir, 't_output.csv')),
                                     'quoting': 'minimal'
                                 }
                             }
@@ -985,15 +929,28 @@ def test_transformation_as_expected_acc(self):
 
             # Assert the transformation result
             assert len(transform_result) == 1
-            assert transform_result[0][0] == str(pathlib.Path(tmp_dir, 'oed_account_output.csv'))
-            assert transform_result[0][1] == 'account'
+            assert transform_result[0][0] == str(pathlib.Path(tmp_dir, 't_output.csv'))
+            assert transform_result[0][1] == 'other'
 
-            # Perform assertions on specific columns in the output file
             output_df = pd.read_csv(transform_result[0][0])
-            expected_output = pd.DataFrame({
-                'AccNumber': ['1253900', '1253900', '1253901'],
-                'AccPeril': ['4334220', '4334220', '4334220'],
-                'CondPeril': ['WSS', 'XCH', 'QEQ'],
-                'LayerAttachment': ['50000000.0', '50000000.0', '225000000.0']
-            })
-            pd.testing.assert_frame_equal(output_df[expected_output.columns].astype(str), expected_output.astype(str))
+
+            expected_values = {
+                'Output_int_1': [110, 120, 111, 113, 117, 155, 201, 1099, 877, 101],
+                'Output_int_2': [20, 30, 21, 23, 27, 65, 111, 1009, 787, 11],
+                'Output_string_1': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', ''],
+                'Output_float_1': [1.57, 4.71, 7.85, 11.304, 15.072, 16.328, 24.806, 348.8854, 0.00314, np.nan],
+                'Output_float_2': [0.159235668789809, 0.477707006369427, 0.796178343949045, 1.14649681528662,
+                                   1.52866242038217, 1.65605095541401, 2.51592356687898, 35.3853503184713,
+                                   0.000318471337579618, np.nan],
+                'Output_multistring_1': ["A;B;C", "A;J", "E;C", 'H', '', "C;I;A", "B;E;E", "J;I;I", "G;I;G", "B;A;G"],
+                'Output_multistring_2': ["United Kingdom;Italy", "Germany;Brasil", "France;France", "Sweden",
+                                         "Spain;Sweden", "Argentina", '', "United States;United Kingdom", "Null",
+                                         "Argentina;Brasil;United States"]
+            }
+        for column, values in expected_values.items():
+            if 'float' in column.lower():
+                assert np.allclose(output_df[column].tolist(), values, equal_nan=True, rtol=1e-5, atol=1e-5)
+            elif 'string' in column.lower():
+                assert [strip_quotes(s) for s in output_df[column].fillna('').tolist()] == values
+            else:
+                assert output_df[column].tolist() == values