Skip to content

Commit

Permalink
add master/detail multi-sheet support
Browse files Browse the repository at this point in the history
  • Loading branch information
saxix committed Oct 3, 2024
1 parent 8068926 commit 6821161
Show file tree
Hide file tree
Showing 6 changed files with 130 additions and 20 deletions.
3 changes: 2 additions & 1 deletion docs/src/.pages
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
nav:
- Home: index.md
- index.md
- install.md
- contributing.md
- usage.md
26 changes: 13 additions & 13 deletions docs/src/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,23 @@ Let imagine a simple `test.xlsx` file with this structure
| Mary | Error | X |

1. Let start creating validation rules (here in the code, you can use the admin interface otherwise)


fs, __ = Fieldset.objects.get_or_create(name="test.xlsx")

charfield = FieldDefinition.objects.get(field_type=forms.CharField)
choicefield = FieldDefinition.objects.get(field_type=forms.ChoiceField)

FlexField.objects.get_or_create(name="name", fieldset=fs, field=charfield)
FlexField.objects.get_or_create(name="last_name", fieldset=fs, field=charfield)
FlexField.objects.get_or_create(name="gender", fieldset=fs, field=choicefield,
attrs={"choices": [["M", "M"], ["F", "F"]]})
fs, __ = Fieldset.objects.get_or_create(name="test.xlsx")
charfield = FieldDefinition.objects.get(field_type=forms.CharField)
choicefield = FieldDefinition.objects.get(field_type=forms.ChoiceField)
FlexField.objects.get_or_create(name="name", fieldset=fs, field=charfield)
FlexField.objects.get_or_create(name="last_name", fieldset=fs, field=charfield)
FlexField.objects.get_or_create(name="gender", fieldset=fs, field=choicefield,
attrs={"choices": [["M", "M"], ["F", "F"]]})


2. Validate the file against it

errors = validate_xls(xls_simple, fs, fail_if_alien=True)
print(errors)
errors = validate_xls(xls_simple, fs, fail_if_alien=True)
print(errors)

```python
{3: {'gender': ['Select a valid choice. X is not one of the available choices.']}}
Expand Down
34 changes: 28 additions & 6 deletions src/hope_smart_import/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,38 @@
from openpyxl.worksheet.worksheet import Worksheet


def import_simple_xls(filepath: str) -> Generator[dict[str, Any], None, None]:
def import_simple_xls(filepath: str, sheet_index: int = 0, start_at: int = 0) -> Generator[dict[str, Any], None, None]:
wb: "Workbook" = openpyxl.load_workbook(filepath)
sh: "Worksheet" = wb.worksheets[0]
sh: "Worksheet" = wb.worksheets[sheet_index]
rows = sh.rows
headers = [str(cell.value) for cell in next(rows)]
for row in rows:
yield dict(zip(headers, (cell.value for cell in row)))
for i, row in enumerate(rows):
if i < start_at:
continue
yield dict(zip(headers, (cell.value for cell in row))) # pragma: no branch


def validate_xls(
filepath: str, checker: DataChecker | Fieldset, fail_if_alien: bool = False
filepath: str,
checker: DataChecker | Fieldset,
sheet_index: int = 0,
fail_if_alien: bool = False,
start_at: int = 0,
) -> Generator[dict[str, Any], None, None]:
return checker.validate(import_simple_xls(filepath), fail_if_alien=fail_if_alien)
return checker.validate(
import_simple_xls(filepath, start_at=start_at, sheet_index=sheet_index), fail_if_alien=fail_if_alien
)


def validate_xls_multi(
filepath: str,
checkers: list[DataChecker | Fieldset],
fail_if_alien: bool = False,
start_at: int = 0,
) -> dict[str, list[dict[str, Any]]]:
errors = {}
for i, checker in enumerate(checkers):
errors[checker.name] = checker.validate(
import_simple_xls(filepath, sheet_index=i, start_at=start_at), fail_if_alien=fail_if_alien
)
return errors
Binary file added tests/data/missing_master.xlsx
Binary file not shown.
Empty file added tests/test_integration.py
Empty file.
87 changes: 87 additions & 0 deletions tests/test_rdi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# mypy: disable-error-code="no-untyped-def"
from pathlib import Path
from typing import Any
from django import forms
import pytest

from demo.factories import FieldsetFactory, FlexFieldFactory, FieldDefinitionFactory
from hope_flex_fields.models import Fieldset

from hope_smart_import.utils import validate_xls, validate_xls_multi


@pytest.fixture()
def xls_rdi() -> str:
return str((Path(__file__).parent / "data" / "rdi1.xlsx").absolute())


@pytest.fixture()
def xls_missing_master() -> str:
return str((Path(__file__).parent / "data" / "missing_master.xlsx").absolute())


@pytest.fixture()
def hh_validator(db: Any) -> Fieldset:
fs = FieldsetFactory(name="household")
FlexFieldFactory(name="household_id", fieldset=fs)
FlexFieldFactory(name="consent_h_c", fieldset=fs)
FlexFieldFactory(name="country_origin_h_c", fieldset=fs)
FlexFieldFactory(name="country_h_c", fieldset=fs)
FlexFieldFactory(name="admin1_h_c", fieldset=fs)
FlexFieldFactory(name="admin2_h_c", fieldset=fs)
FlexFieldFactory(name="size_h_c", fieldset=fs)
FlexFieldFactory(name="hh_latrine_h_f", fieldset=fs)
FlexFieldFactory(name="hh_electricity_h_f", fieldset=fs)
FlexFieldFactory(name="registration_method_h_c", fieldset=fs)
FlexFieldFactory(name="collect_individual_data_h_c", fieldset=fs)
FlexFieldFactory(name="name_enumerator_h_c", fieldset=fs)
FlexFieldFactory(name="org_enumerator_h_c", fieldset=fs)
FlexFieldFactory(name="consent_sharing_h_c", fieldset=fs)
FlexFieldFactory(name="first_registration_date_h_c", fieldset=fs)
return fs


@pytest.fixture()
def ind_validator(db: Any) -> Fieldset:
fs = FieldsetFactory(name="individual")
FlexFieldFactory(name="household_id", fieldset=fs)
FlexFieldFactory(name="relationship_i_c", fieldset=fs)
FlexFieldFactory(name="full_name_i_c", fieldset=fs)
FlexFieldFactory(name="given_name_i_c", fieldset=fs)
FlexFieldFactory(name="middle_name_i_c", fieldset=fs)
FlexFieldFactory(name="family_name_i_c", fieldset=fs)
FlexFieldFactory(name="photo_i_c", fieldset=fs)
FlexFieldFactory(name="gender_i_c", fieldset=fs, field=FieldDefinitionFactory(field_type=forms.ChoiceField),
attrs={"choices": [["FEMALE", "FEMALE"], ["MALE", "MALE"]]})

FlexFieldFactory(name="birth_date_i_c", fieldset=fs, field=FieldDefinitionFactory(field_type=forms.DateField))
FlexFieldFactory(name="estimated_birth_date_i_c", fieldset=fs)
FlexFieldFactory(name="national_id_no_i_c", fieldset=fs)
FlexFieldFactory(name="national_id_photo_i_c", fieldset=fs)
FlexFieldFactory(name="national_id_issuer_i_c", fieldset=fs)
FlexFieldFactory(name="phone_no_i_c", fieldset=fs)
FlexFieldFactory(name="primary_collector_id", fieldset=fs)
FlexFieldFactory(name="alternate_collector_id", fieldset=fs)
FlexFieldFactory(name="first_registration_date_i_c", fieldset=fs)
FlexFieldFactory(name="disability_i_c", fieldset=fs, field=FieldDefinitionFactory(field_type=forms.ChoiceField),
attrs={"choices": [["not disabled", "not disabled"], ["disabled", "disabled"]]})
return fs


def test_validate_simple(xls_rdi: str, hh_validator: Fieldset, ind_validator: Fieldset) -> None:
hh_validator.set_primary_key_col("household_id")
ind_validator.set_master(hh_validator, "household_id")

errors = validate_xls_multi(xls_rdi, [hh_validator, ind_validator], fail_if_alien=True)

assert errors == {'household': {}, 'individual': {}}


def test_validate_missing_master(xls_missing_master: str, hh_validator: Fieldset, ind_validator: Fieldset) -> None:
hh_validator.set_primary_key_col("household_id")
ind_validator.set_master(hh_validator, "household_id")

errors = validate_xls_multi(xls_missing_master, [hh_validator, ind_validator], fail_if_alien=True, start_at=1)

assert errors['individual'] == {1: {'-': ["'missing' not found in master"]}}
assert errors['household'] == {}

0 comments on commit 6821161

Please sign in to comment.