From 8ce2a8c7fd07f9d863b7aa308529647544c3cc37 Mon Sep 17 00:00:00 2001 From: Nicola Cerutti <94574085+ncerutti@users.noreply.github.com> Date: Mon, 1 Jul 2024 13:06:01 +0100 Subject: [PATCH] Transform air-oed without config file (#124) * enable transformation without config file * fix reverse mappings * comments * pep * format as argument with --format * adjust logger when failing * remove oed-air arg --- ods_tools/main.py | 21 +++- ods_tools/odtf/controller.py | 99 ++++++++++++++++++- .../data/mappings/mapping_loc_Cede-OED.yaml | 8 +- 3 files changed, 116 insertions(+), 12 deletions(-) diff --git a/ods_tools/main.py b/ods_tools/main.py index 1eba074..f76096e 100644 --- a/ods_tools/main.py +++ b/ods_tools/main.py @@ -99,9 +99,13 @@ def convert(**kwargs): def transform(**kwargs): """Wrapper function for transform command. Transform location and account data to a new format (ex: AIR to OED)""" - path_to_config_file = kwargs['config_file'] try: - transform_result = transform_format(path_to_config_file) + if kwargs.get('config_file') is None: + if kwargs.get('format') is None or kwargs.get('input_file') is None or kwargs.get('output_file') is None: + raise OdsException("When --config-file is not provided, --format, --input-file, and --output-file are required.") + + transform_result = transform_format(path_to_config_file=kwargs.get('config_file'), input_file=kwargs.get('input_file'), + output_file=kwargs.get('output_file'), transformation=kwargs.get('format')) if not kwargs.get('nocheck'): for output_file in transform_result: if output_file[1] == 'location' and os.path.isfile(output_file[0]): @@ -109,7 +113,6 @@ def transform(**kwargs): elif output_file[1] == 'account' and os.path.isfile(output_file[0]): check(account=output_file[0]) except OdsException as e: - logger.error("Transformation failed:") logger.error(e) except NameError as e: logger.error("Data transformation package requirements not intalled.") @@ -177,13 +180,21 @@ def add_exposure_data_args(command): transform_description = """ Transform data format to/from OED. +This transformation can be done either by providing a config file or directly by specifying the input and output files. +If input and output files are provided, either --oed-air or --air-oed must be specified to indicate the transformation direction. + +If a config file is provided, the transformation will be done according to the config file. +Please note that the config file allows for more options (batch size, file format, database connection, etc.) """ transform_command = command_parser.add_parser('transform', description=transform_description, formatter_class=argparse.RawTextHelpFormatter) -transform_command.add_argument('--config-file', help='Path to the config file', required=True) +transform_command.add_argument('--config-file', help='Path to the config file') +transform_command.add_argument('-f', "--format", help='Specify which transformation to use (currently oed-air or air-oed)', default=None) +transform_command.add_argument('--input-file', help='Path to the input file', default=None) +transform_command.add_argument('--output-file', help='Path to the output file', default=None) transform_command.add_argument('-v', '--logging-level', help='logging level (debug:10, info:20, warning:30, error:40, critical:50)', default=30, type=int) -transform_command.add_argument('--nocheck', help='if True, OED file will not be checked after transformation', default=False) +transform_command.add_argument('--nocheck', help='if True, OED file will not be checked after transformation', default=False, action='store_true') def main(): diff --git a/ods_tools/odtf/controller.py b/ods_tools/odtf/controller.py index ec04e98..c00eedf 100644 --- a/ods_tools/odtf/controller.py +++ b/ods_tools/odtf/controller.py @@ -11,6 +11,52 @@ from .mapping import BaseMapping from .runner import BaseRunner +# Default versions for OED and AIR when running without a config file +OED_VERSION = "3.0.2" +AIR_VERSION = "10.0.0" + +FORMAT_MAPPINGS = { + 'oed-air': { + 'input_format': {'name': 'OED_Location', 'version': OED_VERSION}, + 'output_format': {'name': 'Cede_Location', 'version': AIR_VERSION} + }, + 'air-oed': { + 'input_format': {'name': 'Cede_Location', 'version': AIR_VERSION}, + 'output_format': {'name': 'OED_Location', 'version': OED_VERSION} + } +} + +# Default config when running without a config file +BASE_CONFIG = { + "transformations": { + "loc": { + "input_format": { + "name": "", + "version": "" + }, + "output_format": { + "name": "", + "version": "" + }, + "runner": { + "batch_size": 150000 + }, + "extractor": { + "options": { + "path": "", + "quoting": "minimal" + } + }, + "loader": { + "options": { + "path": "", + "quoting": "minimal" + } + } + } + } +} + logger = logging.getLogger(__name__) CONNECTOR_MAPPINGS = { @@ -121,9 +167,56 @@ def _run_transformation(self, config: TransformationConfig): return None -def transform_format(path_to_config_file): - with open(path_to_config_file, 'r') as file: - config_dict = yaml.safe_load(file) +def generate_config(input_file, output_file, transformation_type): + """ + This function generates a config dictionary based on the input parameters. + When running without a config file, this will generate the config dict. + + Args: + input_file (str): path to the input file + output_file (str): path to the output file + transformation_type (str): either 'oed-air' or 'air-oed' + + Raises: + ValueError: if transformation_type is not 'oed-air' or 'air-oed' + + Returns: + dict: the generated config dictionary + """ + if transformation_type not in FORMAT_MAPPINGS: + raise ValueError( + f'Invalid transformation type. Only {list(FORMAT_MAPPINGS.keys())} are supported.' + ) + + config_dict = BASE_CONFIG.copy() + config_dict['transformations']['loc']['input_format'] = FORMAT_MAPPINGS[transformation_type]['input_format'] + config_dict['transformations']['loc']['output_format'] = FORMAT_MAPPINGS[transformation_type]['output_format'] + config_dict['transformations']['loc']['extractor']['options']['path'] = input_file + config_dict['transformations']['loc']['loader']['options']['path'] = output_file + + return config_dict + + +def transform_format(path_to_config_file=None, input_file=None, output_file=None, transformation=None): + """This function takes the input parameters when called from ods_tools + and starts the transformation process. Either path_to_config_file or + all three input_file, output_file, and transformation_type must be provided. + + Args: + path_to_config_file (str): path to the config file. Defaults to None. + input_file (str: path to the input file. Defaults to None. + output_file (str): path to the output file. Defaults to None. + transformation_type (str): Either 'oed-air' or 'air-oed'. Defaults to None. + + Returns: + list: a list of tuples containing the output file path and the file type. + Used for checking the output files. + """ + if path_to_config_file: + with open(path_to_config_file, 'r') as file: + config_dict = yaml.safe_load(file) + else: + config_dict = generate_config(input_file, output_file, transformation) config = Config(config_dict) controller = Controller(config) controller.run() diff --git a/ods_tools/odtf/data/mappings/mapping_loc_Cede-OED.yaml b/ods_tools/odtf/data/mappings/mapping_loc_Cede-OED.yaml index 0526414..527f70d 100644 --- a/ods_tools/odtf/data/mappings/mapping_loc_Cede-OED.yaml +++ b/ods_tools/odtf/data/mappings/mapping_loc_Cede-OED.yaml @@ -2613,10 +2613,10 @@ reverse: - transformation: Chimney City: - transformation: City - Cladding: + WallSidingCode: - transformation: | replace( - WallSidingCode, + Cladding, '0','0', '1','1', '2','2', @@ -2626,7 +2626,7 @@ reverse: '6','6', '7','7', '8','0', - '9','0' + '9','0', '10','0', '11','0', '12','0', @@ -3282,8 +3282,8 @@ reverse: - transformation: | replace_multiple( LocPeril, + ';',',', 'WSS','CF', - ';',',' 'XCH','CH', 'QEQ','EQ', 'QFF','FF',