Skip to content

Commit

Permalink
use nuts code to detect country and subdivision
Browse files Browse the repository at this point in the history
  • Loading branch information
Roxane committed Feb 6, 2024
1 parent 8bb50e0 commit 590e665
Show file tree
Hide file tree
Showing 7 changed files with 876 additions and 0 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)

## Unreleased

### Added

- Detection for DE and FR subdivisions using NUTS codes

### Dependencies

- remove flake8
Expand Down
19 changes: 19 additions & 0 deletions geoconvert/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
import re

from .data import (
ALL_NUTS_CODES,
BR_POSTCODES_RANGE,
CA_POSTCODE_FIRST_LETTER_TO_PROVINCE_CODE,
DE_HAUPTSTADT,
DE_POSTCODE_RANGE,
NUTS_CODES_BY_COUNTRY,
all_nuts_regex,
br_postcode_regex,
br_state_code_regex,
br_state_name_regex,
Expand All @@ -27,6 +30,7 @@
fr_regions,
language_to_capital_names,
language_to_country_names,
nuts_regexes_by_country,
us_postcode_regex,
us_state_code_regex,
us_state_name_regex,
Expand Down Expand Up @@ -150,6 +154,10 @@ def de_address_to_land_code(text):
code_match = re.search(de_land_code_regex, text)
if code_match:
return code_match.group("code").upper()
# Look for NUTS code in the plain text
nuts_match = re.search(nuts_regexes_by_country["DE"], text)
if nuts_match:
return NUTS_CODES_BY_COUNTRY["DE"].get(nuts_match.group().upper())


def de_postcode_to_land_code(text):
Expand Down Expand Up @@ -242,6 +250,10 @@ def fr_address_to_dept_code(text):
code = fr_postcode_to_dept_code(text)
if code is not None:
return code
# Look for NUTS code in the plain text
nuts_match = re.search(nuts_regexes_by_country["FR"], text)
if nuts_match:
return NUTS_CODES_BY_COUNTRY["FR"].get(nuts_match.group().upper())
# Look for the dept name in plain text
return fr_dept_name_to_dept_code(text)

Expand Down Expand Up @@ -658,9 +670,16 @@ def _guess_country_and_subdivision_codes(text, lang=None):
"""
Just guess the subdivision when no country is explicitly given.
"""
# Look for NUTS code in the plain text
nuts_match = re.search(all_nuts_regex, text)
if nuts_match:
nuts_code = nuts_match.group().upper()
return nuts_code[:2], ALL_NUTS_CODES.get(nuts_code)

country_code, subdivision_code = _guess_country_then_subdivision_codes(text, lang)
if country_code is not None:
return country_code, subdivision_code

return _guess_subdivision_then_country_codes(text)


Expand Down
6 changes: 6 additions & 0 deletions geoconvert/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@
de_landers,
de_postcode_regex,
)
from .subdivisions.nuts import (
ALL_NUTS_CODES,
NUTS_CODES_BY_COUNTRY,
all_nuts_regex,
nuts_regexes_by_country,
)
from .subdivisions.united_states import (
us_postcode_regex,
us_state_code_regex,
Expand Down
Loading

0 comments on commit 590e665

Please sign in to comment.