diff --git a/nomenclature/__init__.py b/nomenclature/__init__.py index 4733c50d..94d8f68d 100644 --- a/nomenclature/__init__.py +++ b/nomenclature/__init__.py @@ -8,6 +8,7 @@ from nomenclature.codelist import CodeList # noqa from nomenclature.core import process # noqa from nomenclature.countries import countries # noqa +from nomenclature.nuts import nuts # noqa from nomenclature.definition import SPECIAL_CODELIST, DataStructureDefinition # noqa from nomenclature.processor import RegionAggregationMapping # noqa from nomenclature.processor import RegionProcessor, RequiredDataValidator # noqa diff --git a/nomenclature/codelist.py b/nomenclature/codelist.py index d1db586b..7b1bd4f3 100644 --- a/nomenclature/codelist.py +++ b/nomenclature/codelist.py @@ -15,6 +15,8 @@ from nomenclature.code import Code, MetaCode, RegionCode, VariableCode from nomenclature.config import CodeListConfig, NomenclatureConfig from nomenclature.error import ErrorCollector, custom_pydantic_errors, log_error +from nomenclature.nuts import nuts + here = Path(__file__).parent.absolute() @@ -726,6 +728,16 @@ def from_directory( except AttributeError: code_list.append(RegionCode(name=c.name, hierarchy="Country")) + # adding nuts regions + if config.definitions.region.nuts: + for level, countries in config.definitions.region.nuts.items(): + for nuts_region in nuts.get( + level=int(level[-1]), country_code=countries + ): + code_list.append( + RegionCode(name=nuts_region.code, hierarchy="NUTS 2021-2024") + ) + # importing from an external repository for repo in config.definitions.region.repositories: repo_path = config.repositories[repo].local_path / "definitions" / "region" diff --git a/nomenclature/config.py b/nomenclature/config.py index 487101bd..4d20d32e 100644 --- a/nomenclature/config.py +++ b/nomenclature/config.py @@ -41,6 +41,19 @@ def repository_dimension_path(self) -> str: class RegionCodeListConfig(CodeListConfig): country: bool = False + nuts: dict[str, str | list[str]] | None = None + + @field_validator("nuts") + @classmethod + def check_nuts( + cls, v: dict[str, str | list[str]] | None + ) -> dict[str, str | list[str]] | None: + if v and not all(k in ["nuts-1", "nuts-2", "nuts-3"] for k in v.keys()): + raise ValueError( + "Invalid fields for `nuts` in configuration. " + "Allowed values are: 'nuts-1', 'nuts-2' and 'nuts-3'." + ) + return v class Repository(BaseModel): diff --git a/nomenclature/definition.py b/nomenclature/definition.py index f72fa597..8a93d1ea 100644 --- a/nomenclature/definition.py +++ b/nomenclature/definition.py @@ -57,7 +57,9 @@ def __init__(self, path, dimensions=None): self.repo = None if not path.is_dir() and not ( - self.config.repositories or self.config.definitions.region.country + self.config.repositories + or self.config.definitions.region.country + or self.config.definitions.region.nuts ): raise NotADirectoryError(f"Definitions directory not found: {path}") diff --git a/nomenclature/nuts.py b/nomenclature/nuts.py new file mode 100644 index 00000000..b3b62219 --- /dev/null +++ b/nomenclature/nuts.py @@ -0,0 +1,7 @@ +import logging + +import pysquirrel + +logger = logging.getLogger(__name__) + +nuts = pysquirrel.nuts diff --git a/poetry.lock b/poetry.lock index 41867537..3fb5fbfb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1544,6 +1544,22 @@ testbase = ["pytest", "pytest-benchmark", "pytest-cov", "pytest-subtests"] uncertainties = ["uncertainties (>=3.1.6)"] xarray = ["xarray"] +[[package]] +name = "platformdirs" +version = "4.3.6" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." +optional = false +python-versions = ">=3.8" +files = [ + {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"}, + {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"}, +] + +[package.extras] +docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"] +type = ["mypy (>=1.11.2)"] + [[package]] name = "pluggy" version = "1.5.0" @@ -1559,6 +1575,27 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "pooch" +version = "1.8.2" +description = "A friend to fetch your data files" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pooch-1.8.2-py3-none-any.whl", hash = "sha256:3529a57096f7198778a5ceefd5ac3ef0e4d06a6ddaf9fc2d609b806f25302c47"}, + {file = "pooch-1.8.2.tar.gz", hash = "sha256:76561f0de68a01da4df6af38e9955c4c9d1a5c90da73f7e40276a5728ec83d10"}, +] + +[package.dependencies] +packaging = ">=20.0" +platformdirs = ">=2.5.0" +requests = ">=2.19.0" + +[package.extras] +progress = ["tqdm (>=4.41.0,<5.0.0)"] +sftp = ["paramiko (>=2.7.0)"] +xxhash = ["xxhash (>=1.4.3)"] + [[package]] name = "psycopg" version = "3.2.1" @@ -1889,6 +1926,22 @@ files = [ [package.extras] diagrams = ["jinja2", "railroad-diagrams"] +[[package]] +name = "pysquirrel" +version = "1.0" +description = "Python package designed to fetch NUTS administrative divisions" +optional = false +python-versions = "<4.0,>=3.10" +files = [ + {file = "pysquirrel-1.0-py3-none-any.whl", hash = "sha256:d82759a31733ecda1fb045a0a2219fba02b37ac799b836c00aa95bf66f91a400"}, + {file = "pysquirrel-1.0.tar.gz", hash = "sha256:ee79915a7140d159a010752dd1ddb7657ad2cc33f0ae53f8c875f3b7c1dfc273"}, +] + +[package.dependencies] +openpyxl = ">=3.1.5,<4.0.0" +pooch = ">=1.8.2,<2.0.0" +pydantic = ">=2.8.2,<3.0.0" + [[package]] name = "pytest" version = "7.4.4" @@ -2706,4 +2759,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = ">=3.10, <3.13" -content-hash = "41bc60ab52c053b074e05975182531371bb3c5cc9d29ee44b89358395e463636" +content-hash = "ae78b6f629819ffc09f590cf5478fb749c40a94d8ea5e1f9f1b0823cf6902145" diff --git a/pyproject.toml b/pyproject.toml index 6ef61c21..745dd99d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ pydantic = "^2" PyYAML = "^6.0.1" pandas = ">=1.5.2" pycountry = "23.12.11" # pin to guard against "silent" changes in country names +pysquirrel = "1.0" gitpython = "^3.1.40" numpy = "^1.23.0" diff --git a/tests/data/general-config-only-nuts/nomenclature.yaml b/tests/data/general-config-only-nuts/nomenclature.yaml new file mode 100644 index 00000000..46751346 --- /dev/null +++ b/tests/data/general-config-only-nuts/nomenclature.yaml @@ -0,0 +1,8 @@ +dimensions: + - region +definitions: + region: + nuts: + nuts-1: [ AT ] + nuts-2: BE + nuts-3: [ CZ ] diff --git a/tests/test_definition.py b/tests/test_definition.py index 0f1d8cc1..5dfc1e27 100644 --- a/tests/test_definition.py +++ b/tests/test_definition.py @@ -65,6 +65,17 @@ def test_definition_general_config_country_only(): assert all(region in obs.region for region in ("Austria", "Bolivia", "Kosovo")) +def test_definition_general_config_nuts_only(): + """Check that DataStructureDefinition is properly initialised with NUTS region config only""" + obs = DataStructureDefinition( + TEST_DATA_DIR / "general-config-only-nuts" / "definitions" + ) + assert all(region[:2] in ("AT", "BE", "CZ") for region in obs.region) + assert len([region for region in obs.region if region.startswith("AT")]) == 4 + assert len([region for region in obs.region if region.startswith("BE")]) == 12 + assert len([region for region in obs.region if region.startswith("CZ")]) == 15 + + def test_to_excel(simple_definition, tmpdir): """Check writing a DataStructureDefinition to file""" file = tmpdir / "testing_export.xlsx"