Skip to content

Commit

Permalink
Transform air-oed without config file (#124)
Browse files Browse the repository at this point in the history
* enable transformation without config file

* fix reverse mappings

* comments

* pep

* format as argument with --format

* adjust logger when failing

* remove oed-air arg
  • Loading branch information
ncerutti authored and sambles committed Jul 1, 2024
1 parent 3ac3167 commit 8ce2a8c
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 12 deletions.
21 changes: 16 additions & 5 deletions ods_tools/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,17 +99,20 @@ def convert(**kwargs):
def transform(**kwargs):
"""Wrapper function for transform command.
Transform location and account data to a new format (ex: AIR to OED)"""
path_to_config_file = kwargs['config_file']
try:
transform_result = transform_format(path_to_config_file)
if kwargs.get('config_file') is None:
if kwargs.get('format') is None or kwargs.get('input_file') is None or kwargs.get('output_file') is None:
raise OdsException("When --config-file is not provided, --format, --input-file, and --output-file are required.")

transform_result = transform_format(path_to_config_file=kwargs.get('config_file'), input_file=kwargs.get('input_file'),
output_file=kwargs.get('output_file'), transformation=kwargs.get('format'))
if not kwargs.get('nocheck'):
for output_file in transform_result:
if output_file[1] == 'location' and os.path.isfile(output_file[0]):
check(location=output_file[0])
elif output_file[1] == 'account' and os.path.isfile(output_file[0]):
check(account=output_file[0])
except OdsException as e:
logger.error("Transformation failed:")
logger.error(e)
except NameError as e:
logger.error("Data transformation package requirements not intalled.")
Expand Down Expand Up @@ -177,13 +180,21 @@ def add_exposure_data_args(command):

transform_description = """
Transform data format to/from OED.
This transformation can be done either by providing a config file or directly by specifying the input and output files.
If input and output files are provided, either --oed-air or --air-oed must be specified to indicate the transformation direction.
If a config file is provided, the transformation will be done according to the config file.
Please note that the config file allows for more options (batch size, file format, database connection, etc.)
"""
transform_command = command_parser.add_parser('transform', description=transform_description,
formatter_class=argparse.RawTextHelpFormatter)
transform_command.add_argument('--config-file', help='Path to the config file', required=True)
transform_command.add_argument('--config-file', help='Path to the config file')
transform_command.add_argument('-f', "--format", help='Specify which transformation to use (currently oed-air or air-oed)', default=None)
transform_command.add_argument('--input-file', help='Path to the input file', default=None)
transform_command.add_argument('--output-file', help='Path to the output file', default=None)
transform_command.add_argument('-v', '--logging-level', help='logging level (debug:10, info:20, warning:30, error:40, critical:50)',
default=30, type=int)
transform_command.add_argument('--nocheck', help='if True, OED file will not be checked after transformation', default=False)
transform_command.add_argument('--nocheck', help='if True, OED file will not be checked after transformation', default=False, action='store_true')


def main():
Expand Down
99 changes: 96 additions & 3 deletions ods_tools/odtf/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,52 @@
from .mapping import BaseMapping
from .runner import BaseRunner

# Default versions for OED and AIR when running without a config file
OED_VERSION = "3.0.2"
AIR_VERSION = "10.0.0"

FORMAT_MAPPINGS = {
'oed-air': {
'input_format': {'name': 'OED_Location', 'version': OED_VERSION},
'output_format': {'name': 'Cede_Location', 'version': AIR_VERSION}
},
'air-oed': {
'input_format': {'name': 'Cede_Location', 'version': AIR_VERSION},
'output_format': {'name': 'OED_Location', 'version': OED_VERSION}
}
}

# Default config when running without a config file
BASE_CONFIG = {
"transformations": {
"loc": {
"input_format": {
"name": "",
"version": ""
},
"output_format": {
"name": "",
"version": ""
},
"runner": {
"batch_size": 150000
},
"extractor": {
"options": {
"path": "",
"quoting": "minimal"
}
},
"loader": {
"options": {
"path": "",
"quoting": "minimal"
}
}
}
}
}

logger = logging.getLogger(__name__)

CONNECTOR_MAPPINGS = {
Expand Down Expand Up @@ -121,9 +167,56 @@ def _run_transformation(self, config: TransformationConfig):
return None


def transform_format(path_to_config_file):
with open(path_to_config_file, 'r') as file:
config_dict = yaml.safe_load(file)
def generate_config(input_file, output_file, transformation_type):
"""
This function generates a config dictionary based on the input parameters.
When running without a config file, this will generate the config dict.
Args:
input_file (str): path to the input file
output_file (str): path to the output file
transformation_type (str): either 'oed-air' or 'air-oed'
Raises:
ValueError: if transformation_type is not 'oed-air' or 'air-oed'
Returns:
dict: the generated config dictionary
"""
if transformation_type not in FORMAT_MAPPINGS:
raise ValueError(
f'Invalid transformation type. Only {list(FORMAT_MAPPINGS.keys())} are supported.'
)

config_dict = BASE_CONFIG.copy()
config_dict['transformations']['loc']['input_format'] = FORMAT_MAPPINGS[transformation_type]['input_format']
config_dict['transformations']['loc']['output_format'] = FORMAT_MAPPINGS[transformation_type]['output_format']
config_dict['transformations']['loc']['extractor']['options']['path'] = input_file
config_dict['transformations']['loc']['loader']['options']['path'] = output_file

return config_dict


def transform_format(path_to_config_file=None, input_file=None, output_file=None, transformation=None):
"""This function takes the input parameters when called from ods_tools
and starts the transformation process. Either path_to_config_file or
all three input_file, output_file, and transformation_type must be provided.
Args:
path_to_config_file (str): path to the config file. Defaults to None.
input_file (str: path to the input file. Defaults to None.
output_file (str): path to the output file. Defaults to None.
transformation_type (str): Either 'oed-air' or 'air-oed'. Defaults to None.
Returns:
list: a list of tuples containing the output file path and the file type.
Used for checking the output files.
"""
if path_to_config_file:
with open(path_to_config_file, 'r') as file:
config_dict = yaml.safe_load(file)
else:
config_dict = generate_config(input_file, output_file, transformation)
config = Config(config_dict)
controller = Controller(config)
controller.run()
Expand Down
8 changes: 4 additions & 4 deletions ods_tools/odtf/data/mappings/mapping_loc_Cede-OED.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2613,10 +2613,10 @@ reverse:
- transformation: Chimney
City:
- transformation: City
Cladding:
WallSidingCode:
- transformation: |
replace(
WallSidingCode,
Cladding,
'0','0',
'1','1',
'2','2',
Expand All @@ -2626,7 +2626,7 @@ reverse:
'6','6',
'7','7',
'8','0',
'9','0'
'9','0',
'10','0',
'11','0',
'12','0',
Expand Down Expand Up @@ -3282,8 +3282,8 @@ reverse:
- transformation: |
replace_multiple(
LocPeril,
';',',',
'WSS','CF',
';',','
'XCH','CH',
'QEQ','EQ',
'QFF','FF',
Expand Down

0 comments on commit 8ce2a8c

Please sign in to comment.