Skip to content

Commit

Permalink
Merge branch 'main' into feature/validationpath
Browse files Browse the repository at this point in the history
  • Loading branch information
ncerutti authored Sep 19, 2024
2 parents 508e5a8 + 611dec6 commit 4cc1781
Show file tree
Hide file tree
Showing 12 changed files with 102 additions and 83 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ jobs:
- name: Download OED spec
if: inputs.oed_spec_json != '' || inputs.oed_spec_branch != ''
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: extracted_spec
path: ${{ github.workspace }}/
Expand All @@ -100,15 +100,15 @@ jobs:
echo "source=$SRC" >> $GITHUB_OUTPUT
- name: Store source package
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: src_package
path: ./dist/${{ steps.build_package.outputs.source }}
retention-days: 5

- name: Store Wheel package
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: bin_package
path: ./dist/${{ steps.build_package.outputs.wheel }}
retention-days: 5
retention-days: 5
4 changes: 2 additions & 2 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -157,13 +157,13 @@ jobs:
# --- Sign packages --- #
- name: Download Source package
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: src_package
path: ${{ github.workspace }}/

- name: Download Linux package
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: bin_package
path: ${{ github.workspace }}/
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test-oasislmf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
pip install -r requirements.txt
- name: Download ods-tools package
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: bin_package
path: ${{ github.workspace }}/
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ jobs:
python-version: ${{ matrix.python-version }}

- name: Download package (ODS-tools)
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: bin_package
path: ${{ github.workspace }}/

- name: Download package (OasisDataManager)
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: odm_bin_package
path: ${{ github.workspace }}/
Expand Down
6 changes: 6 additions & 0 deletions ods_tools/data/analysis_settings_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,12 @@
"description": "If true apply post loss amplification/reduction to losses.",
"default": false
},
"do_disaggregation": {
"type": "boolean",
"title": "Apply Oasis Disaggregation",
"description": "Set whether to split terms and conditions for aggregate exposure (optional)",
"default": false
},
"pla_secondary_factor": {
"type": "number",
"title": "Optional secondary factor for Post Loss Amplification",
Expand Down
6 changes: 6 additions & 0 deletions ods_tools/data/model_settings_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,12 @@
"title": "Apply Post Loss Amplification",
"description": "If true apply post loss amplification/reduction to losses by default.",
"default": false
},
"do_disaggregation": {
"type": "boolean",
"title": "Apply Oasis Disaggregation",
"description": "Set whether to split terms and conditions for aggregate exposure (optional)",
"default": false
},
"string_parameters":{
"title":"Single string paramters",
Expand Down
54 changes: 24 additions & 30 deletions ods_tools/odtf/connector/csv.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import csv
from typing import Any, Dict, Iterable

import numpy as np
import pandas as pd

from .base import BaseConnector
Expand Down Expand Up @@ -63,39 +63,33 @@ def __init__(self, config, **options):

self.file_path = config.absolute_path(options["path"])
self.write_header = options.get("write_header", True)
self.quoting = {
"all": csv.QUOTE_ALL,
"minimal": csv.QUOTE_MINIMAL,
"none": csv.QUOTE_NONE,
"nonnumeric": csv.QUOTE_NONNUMERIC,
}.get(options.get("quoting", "nonnumeric"))

def _data_serializer(self, row):
return {
k: f'"{v}"'.strip() if isinstance(v, str) and any(d in v for d in [',', ';', '\t', '\n', '"']) else (
v if v is not None and not isinstance(v, NotSetType) else "")
for k, v in row.items()
}
def process_value(v):
if v is None or isinstance(v, NotSetType):
return ""
# add quotes to values that contain special characters
if isinstance(v, str) and any(d in v for d in [',', ';', '\t', '\n', '"']):
return f'"{v}"'
return str(v)

return [process_value(v) for v in row.values()]

def load(self, data: Iterable[Dict[str, Any]]):
try:
data = iter(data)
first_row = next(data)
except StopIteration:
return

with open(self.file_path, "w", newline="") as f:
writer = csv.DictWriter(
f,
fieldnames=list(first_row.keys()),
quoting=self.quoting
)

if self.write_header:
writer.writeheader()

writer.writerow(self._data_serializer(first_row))
writer.writerows(map(self._data_serializer, data))
first_batch = True

with open(self.file_path, "a", newline="") as f:
for batch in data:
fieldnames = list(batch.keys())

rows = np.array([self._data_serializer(batch)])

if first_batch and self.write_header:
header = ','.join(fieldnames)
np.savetxt(f, [], fmt='%s', delimiter=',', header=header, comments='')
first_batch = False

np.savetxt(f, rows, fmt='%s', delimiter=',', comments='')

def fetch_data(self, chunksize: int) -> Iterable[pd.DataFrame]:
"""
Expand Down
73 changes: 39 additions & 34 deletions ods_tools/odtf/runner/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,37 +438,42 @@ def transform(
total_rows = 0
transformations = []

for batch in extractor.fetch_data(batch_size):
# Calculates the set of transformations from the columns in the
# first batch (to avoid double-querying)
if not transformations:
available_columns = set(batch.columns)
transformations = mapping.get_transformations(available_columns=available_columns)
logger.info(f"Running transformation set {transformations[0].input_format} -> {transformations[-1].output_format} [{extractor.name}]")

# Validate input data
try:
validator.run(self.coerce_row_types(batch, transformations[0].types),
mapping.input_format.name, mapping.input_format.version, mapping.file_type)
except KeyError as e:
logger.warning(f"Validation failed due to a missing column: {e}")
except Exception as e:
logger.warning(f"Validation failed: {e}")

# Apply the transformations to the batch
for transformation in transformations:
batch = self.apply_transformation_set(batch, transformation)

# Validate output data
try:
validator.run(batch, mapping.output_format.name, mapping.output_format.version, mapping.file_type)
except KeyError as e:
logger.warning(f"Validation failed due to a missing column: {e}")
except Exception as e:
logger.warning(f"Validation failed: {e}")

# Log the transformation progress
total_rows += len(batch)
logger.info(f"Processed {len(batch)} rows in the current batch (total: {total_rows})")

yield from (r.to_dict() for idx, r in batch.iterrows())
try:
for batch in extractor.fetch_data(batch_size):
# Calculates the set of transformations from the columns in the
# first batch (to avoid double-querying)
if not transformations:
available_columns = set(batch.columns)
transformations = mapping.get_transformations(available_columns=available_columns)
logger.info(f"Running transformation set {transformations[0].input_format} -> {transformations[-1].output_format} [{extractor.name}]")

# Validate input data
try:
validator.run(self.coerce_row_types(batch, transformations[0].types),
mapping.input_format.name, mapping.input_format.version, mapping.file_type)
except KeyError as e:
logger.warning(f"Validation failed due to a missing column: {e}")
except Exception as e:
logger.warning(f"Validation failed: {e}")

# Apply the transformations to the batch
for transformation in transformations:
batch = self.apply_transformation_set(batch, transformation)

# Validate output data
try:
validator.run(batch, mapping.output_format.name, mapping.output_format.version, mapping.file_type)
except KeyError as e:
logger.warning(f"Validation failed due to a missing column: {e}")
except Exception as e:
logger.warning(f"Validation failed: {e}")

# Log the transformation progress
total_rows += len(batch)
logger.info(f"Processed {len(batch)} rows in the current batch (total: {total_rows})")

yield from (r.to_dict() for idx, r in batch.iterrows())
except FileNotFoundError:
logger.error(f"File not found: {extractor.file_path}")
except Exception as e:
logger.error(f"Error processing batch: {e}")
18 changes: 15 additions & 3 deletions ods_tools/odtf/transformers/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import Any, Callable, Iterable, List, Pattern, TypedDict, Union

from lark import Transformer as _LarkTransformer
from lark import Tree
from lark import Token, Tree
from lark import exceptions as lark_exceptions
from lark import v_args
from ..transformers.transform_utils import replace_multiple
Expand Down Expand Up @@ -452,7 +452,9 @@ def boolean(self, value):
:return: True if the value is "True", False otherwise
"""
return value == "True"
if isinstance(value, bool):
return value
return str(value).lower() == "true"

def null(self, value):
"""
Expand All @@ -479,6 +481,16 @@ def number(self, value):
return float(value)


def safe_lookup(r, name):
if isinstance(name, Token):
name = name.value
if name == 'True':
return True
elif name == 'False':
return False
return r.get(name, name)


def create_transformer_class(row, transformer_mapping):
"""
Creates a transformer class from the provided mapping overrides.
Expand All @@ -489,7 +501,7 @@ def create_transformer_class(row, transformer_mapping):
:return: The new transformer class
"""
transformer_mapping = {
"lookup": lambda r, name: r[name],
"lookup": safe_lookup,
"add": lambda r, lhs, rhs: add(lhs, rhs),
"subtract": lambda r, lhs, rhs: sub(lhs, rhs),
"multiply": lambda r, lhs, rhs: mul(lhs, rhs),
Expand Down
5 changes: 0 additions & 5 deletions tests/loctest_transform_input.csv

This file was deleted.

2 changes: 1 addition & 1 deletion tests/t_input.csv
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ Line,Input_int_1,Input_int_2,Input_string_1,Input_string_2,Input_multistring_1,I
6,55,55,letter_F,letter_F,"letter_C, letter_I, letter_A",ARG,5.2,5.2
7,101,101,letter_G,letter_G,"letter_B, letter_E, letter_E",,7.9,7.9
8,999,999,letter_H,letter_H,"letter_J, letter_I, letter_I","USA, UK",111.11,111.11
9,777,777,letter_I,letter_I,"letter_G, letter_I, letter_G",Null,0.001,0.001
9,777,777,letter_I,letter_I,"letter_G, letter_I, letter_G",,0.001,0.001
10,1,1,,,"letter_B, letter_A, letter_G","ARG, BRA, USA",,
3 changes: 2 additions & 1 deletion tests/test_ods_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -948,9 +948,10 @@ def test_transformation_as_expected(self):
0.000318471337579618, np.nan],
'Output_multistring_1': ["A;B;C", "A;J", "E;C", 'H', '', "C;I;A", "B;E;E", "J;I;I", "G;I;G", "B;A;G"],
'Output_multistring_2': ["United Kingdom;Italy", "Germany;Brasil", "France;France", "Sweden",
"Spain;Sweden", "Argentina", '', "United States;United Kingdom", "Null",
"Spain;Sweden", "Argentina", '', "United States;United Kingdom", '',
"Argentina;Brasil;United States"]
}

for column, values in expected_values.items():
if 'float' in column.lower():
assert np.allclose(output_df[column].tolist(), values, equal_nan=True, rtol=1e-5, atol=1e-5)
Expand Down

0 comments on commit 4cc1781

Please sign in to comment.