Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Units #10

Merged
merged 33 commits into from
Jul 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
920ac7f
unit conversions
siligam Jun 27, 2024
c769e44
added unit test for units
siligam Jun 27, 2024
36009af
refactored code to better utilize pint library for addressing unit no…
siligam Jul 1, 2024
bc5fe66
code cleanup (units.py)
siligam Jul 2, 2024
d9a22bf
Ruff formatted
siligam Jul 2, 2024
191f859
Update src/pymorize/units.py
siligam Jul 3, 2024
5e0a470
f-string formatting
siligam Jul 4, 2024
1993a73
renamed convert function to a more human-reabale name
siligam Jul 4, 2024
ad901e9
renamed fix_exponent_notation and fix_power_notation as suggest
siligam Jul 4, 2024
b45167a
Update tests/test_units.py
siligam Jul 16, 2024
4296c69
Update src/pymorize/units.py
siligam Jul 16, 2024
901b87b
refactored code
siligam Jul 17, 2024
84b4c4f
isort
siligam Jul 17, 2024
5a78f28
added chemicals package dependency
siligam Jul 17, 2024
54329f2
relocated documentation to the top of the file.
siligam Jul 17, 2024
ad24e1f
F-String in normalize_power_notation
pgierz Jul 17, 2024
68c16f2
cleanup: removes units_en.txt file
pgierz Jul 17, 2024
6132352
chore(units.py): fix all print statements for loguru
pgierz Jul 17, 2024
628fcaa
feat(units.py):include geographic coordinates for east/west and north…
pgierz Jul 17, 2024
0dad97c
style(units.py): applies black formatter
pgierz Jul 17, 2024
357b070
fix(test_units): ensure that the test uses the module's unit registry…
pgierz Jul 17, 2024
4f74ba0
fix: corrects test for undefined weight conversion
pgierz Jul 17, 2024
98c9df9
fix(units.py): log conversion factor separately
pgierz Jul 17, 2024
d265acc
style: f-strings everywhere in units.py
pgierz Jul 17, 2024
178641f
fix: silly mistake in f-string
pgierz Jul 17, 2024
aaf0f9b
refactored chemical elements
siligam Jul 17, 2024
da7e624
added support for xr.DataArray
siligam Jul 20, 2024
fa6b796
Update tests/test_units.py
siligam Jul 22, 2024
f9d8264
Update src/pymorize/units.py
siligam Jul 22, 2024
9bb9e59
code cleanup
siligam Jul 22, 2024
a61aa91
Using Union instead of | for type hints to support python3.9
siligam Jul 23, 2024
67c2ac6
Merge branch 'main' into units
siligam Jul 23, 2024
221d308
added units to pipeline
siligam Jul 23, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def read(filename):
"rich-click",
"versioneer",
"xarray",
"chemicals"
],
development_requires=[
"black",
Expand Down
1 change: 1 addition & 0 deletions src/pymorize/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,4 +197,5 @@ def __init__(self):
super().__init__(
# FIXME: Fill in with appropriate steps
get_callable_by_name("pymorize.generic.load_data"),
get_callable_by_name("pymorize.units.handle_unit_conversion"),
)
75 changes: 75 additions & 0 deletions src/pymorize/units.py
siligam marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""
This module deals with the auto-unit conversion in the cmorization process.
In case the units in model files differ from CMIP Tables, this module attempts to
convert them automatically.

In case of missing units in either model files or CMIP Tables, this module can
not convert from a dimentionless base to something with dimension. Dealing with
such thing have to done with `action` section in the Rules module on a per
variable basis.
"""

import re
from typing import Pattern, Union

import cf_xarray.units
import pint_xarray
import xarray as xr
from chemicals import periodic_table
from loguru import logger

ureg = pint_xarray.unit_registry


def handle_chemicals(
s: Union[str, None] = None, pattern: Pattern = re.compile(r"mol(?P<symbol>\w+)")
):
"""Registers known chemical elements definitions to global ureg (unit registry)"""
if s is None:
return
match = pattern.search(s)
if match:
d = match.groupdict()
try:
element = getattr(periodic_table, d["symbol"])
except AttributeError:
raise ValueError(
f"Unknown chemical element {d.groupdict()['symbol']} in {d.group()}"
)
else:
try:
ureg(s)
except pint_xarray.pint.errors.UndefinedUnitError:
logger.debug(f"Chemical element {element.name} detected in units {s}.")
logger.debug(
f"Registering definition: {match.group()} = {element.MW} * g"
)
ureg.define(f"{match.group()} = {element.MW} * g")
siligam marked this conversation as resolved.
Show resolved Hide resolved


def handle_unit_conversion(
da: xr.DataArray, unit: str, source_unit: Union[str, None] = None
) -> xr.DataArray:
"""Performs the unit-aware data conversion.

If `source_unit` is provided, it is used instead of the unit from DataArray.

Parameters:
-----------
da: xr.DataArray
unit: unit to convert data to
source_unit: Override the unit on xr.DataArray if needed.
"""
from_unit = da.attrs.get("units")
if source_unit is not None:
logger.debug(
f"using user defined unit ({source_unit}) instead of ({from_unit}) from DataArray "
)
from_unit = source_unit
handle_chemicals(from_unit)
handle_chemicals(unit)
new_da = da.pint.quantify(from_unit)
new_da = new_da.pint.to(unit).pint.dequantify()
logger.debug(f"setting units on DataArray: {unit}")
new_da.attrs["units"] = unit
pgierz marked this conversation as resolved.
Show resolved Hide resolved
return new_da
142 changes: 142 additions & 0 deletions tests/test_units.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import numpy as np
import pint
pgierz marked this conversation as resolved.
Show resolved Hide resolved
import pytest
import xarray as xr
from chemicals import periodic_table

from pymorize.units import handle_chemicals, handle_unit_conversion, ureg

# input samples that are found in CMIP6 tables and in fesom1 (recom)
allunits = [
"%",
"0.001",
"1",
"1.e6 J m-1 s-1",
"1e-06",
"1e-3 kg m-2",
"1e3 km3",
"J m-2",
"K",
"K Pa s-1",
"K s-1",
"K2",
"Pa2 s-2",
"W m^-2",
"W/m2",
"W/m^2",
"day",
"degC",
"degC kg m-2",
"degC2",
"degree",
"degrees_east",
"degrees_north",
"kg kg-1",
"kg m-2 s-1",
"kg m-3",
"kg s-1",
"km-2 s-1",
"m-1 sr-1",
"m-2",
"m^-3",
"m^2",
"mol/kg",
"mol/m2",
"mol m-2",
"mol m^-2",
"(mol/kg) / atm",
"mmol/m2/d",
"uatm",
"year",
"yr",
]


@pytest.mark.parametrize("test_input", allunits)
def test_can_read_units(test_input):
ureg(test_input)


units_with_chemical_element = [
"mmolC/(m2*d)",
"mmolC/d",
"mmolC/m2/d",
"mmolN/(m2*d)",
"mmolN/d",
"umolFe/m2/s",
]


@pytest.mark.parametrize("test_input", units_with_chemical_element)
def test_handle_chemicals(test_input):
"""Ensures the unit registry can add new units when parsed by ``handle_chemicals``."""
handle_chemicals(test_input)
ureg(test_input)


def test_can_handle_simple_chemical_elements():
from_unit = "molC"
to_unit = "g"
da = xr.DataArray(10, attrs={"units": from_unit})
new_da = handle_unit_conversion(da, to_unit)
assert new_da.data == np.array(periodic_table.Carbon.MW * 10)
assert new_da.attrs["units"] == to_unit


def test_can_handle_chemical_elements():
from_unit = "mmolC/m2/d"
to_unit = "kg m-2 s-1"
da = xr.DataArray(10, attrs={"units": from_unit})
new_da = handle_unit_conversion(da, to_unit)
assert np.allclose(new_da.data, np.array(1.39012731e-09))
siligam marked this conversation as resolved.
Show resolved Hide resolved
assert new_da.attrs["units"] == to_unit


def test_user_defined_units_takes_precedence_over_units_in_dataarray():
from_unit = "molC"
to_unit = "g"
da = xr.DataArray(10, attrs={"units": "kg"})
# here, "molC" will be used instead of "kg"
new_da = handle_unit_conversion(da, to_unit, from_unit)
assert new_da.data == np.array(periodic_table.Carbon.MW * 10)
assert new_da.attrs["units"] == to_unit


def test_without_defining_uraninum_to_weight_conversion_raises_error():
"""Checks that only elements we added are defined"""
with pytest.raises(pint.errors.UndefinedUnitError):
ureg("mmolU/m**2/d")


def test_recognizes_previous_defined_chemical_elements():
assert "mmolC/m^2/d" in ureg


def test_works_when_both_units_are_None():
to_unit = None
da = xr.DataArray(10, attrs={"units": None})
new_da = handle_unit_conversion(da, to_unit)
assert new_da.attrs["units"] == to_unit


def test_works_when_both_units_are_empty_string():
to_unit = ""
da = xr.DataArray(10, attrs={"units": ""})
new_da = handle_unit_conversion(da, to_unit)
assert new_da.attrs["units"] == to_unit


@pytest.mark.parametrize("from_unit", ["m/s", None, ""])
def test_when_target_units_is_None_overrides_existing_units(from_unit):
to_unit = None
da = xr.DataArray(10, attrs={"units": from_unit})
new_da = handle_unit_conversion(da, to_unit)
assert new_da.attrs["units"] == to_unit


@pytest.mark.parametrize("from_unit", ["m/s", None])
def test_when_tartget_unit_is_empty_string_raises_error(from_unit):
to_unit = ""
da = xr.DataArray(10, attrs={"units": from_unit})
with pytest.raises(ValueError):
handle_unit_conversion(da, to_unit)
Loading