Merge branch 'main' into develop

ACCESS-NRI · Sep 23, 2024 · 99b89ea · 99b89ea
2 parents 0ee303a + 92096ce
commit 99b89ea
Show file tree

Hide file tree

Showing 2 changed files with 171 additions and 33 deletions.
diff --git a/test/test_um2netcdf.py b/test/test_um2netcdf.py
@@ -1,8 +1,8 @@
 import unittest.mock as mock
 from dataclasses import dataclass
 from collections import namedtuple
-import numpy as np
 from iris.exceptions import CoordinateNotFoundError
+import operator
 
 import umpost.um2netcdf as um2nc
 
@@ -13,6 +13,7 @@
 import mule.ff
 import iris.cube
 import iris.coords
+import iris.exceptions
 
 D_LAT_N96 = 1.25   # Degrees between latitude points on N96 grid
 D_LON_N96 = 1.875  # Degrees between longitude points on N96 grid
@@ -403,12 +404,14 @@ def __init__(self, item_code, var_name=None, attributes=None, units=None):
         self.units = None or units
         self.standard_name = None
         self.long_name = None
-
+        self.data = None
+
     def coord(self, _):
         raise NotImplementedError(
             "coord() method not implemented on DummyCube"
         )
 
+
     def name(self):
         # mimic iris API
         return self.var_name
@@ -1038,3 +1041,92 @@ def test_fix_cell_methods_keep_weeks():
     assert mod.method == cm.method
     assert mod.coord_names == cm.coord_names
     assert mod.intervals[0] == "week"
+
+
+@pytest.fixture
+def level_heights():
+    # NB: sourced from z_sea_theta_data fixture. This "array" is cropped as
+    #     fix_level_coords() only accesses height array[0]
+    return [20.0003377]
+
+@pytest.fixture
+def level_coords(level_heights):
+    return {um2nc.MODEL_LEVEL_NUM: iris.coords.DimCoord(range(1, 39)),
+            um2nc.LEVEL_HEIGHT: iris.coords.DimCoord(level_heights),
+            um2nc.SIGMA: iris.coords.AuxCoord(np.array([0.99771646]))}
+
+@pytest.fixture
+def get_fake_cube_coords(level_coords):
+
+    @dataclass
+    class FakeCubeCoords:
+        """Test object to represent a cube with a coords() access function."""
+
+        def coord(self, key):
+            return level_coords[key]
+
+    # return class for instantiation in tests
+    return FakeCubeCoords
+
+
+def test_fix_level_coord_modify_cube_with_rho(level_heights,
+                                              get_fake_cube_coords,
+                                              z_sea_rho_data,
+                                              z_sea_theta_data):
+    # verify cube renaming with appropriate z_rho data
+    cube = get_fake_cube_coords()
+    assert cube.coord(um2nc.MODEL_LEVEL_NUM).var_name is None
+    assert cube.coord(um2nc.LEVEL_HEIGHT).var_name is None
+    assert cube.coord(um2nc.SIGMA).var_name is None
+
+
+    rho = np.ones(z_sea_theta_data.shape) * level_heights[0]
+    um2nc.fix_level_coord(cube, rho, z_sea_theta_data)
+
+    assert cube.coord(um2nc.MODEL_LEVEL_NUM).var_name == "model_rho_level_number"
+    assert cube.coord(um2nc.LEVEL_HEIGHT).var_name == "rho_level_height"
+    assert cube.coord(um2nc.SIGMA).var_name == "sigma_rho"
+
+
+def test_fix_level_coord_modify_cube_with_theta(level_heights,
+                                                get_fake_cube_coords,
+                                                z_sea_rho_data,
+                                                z_sea_theta_data):
+    # verify cube renaming with appropriate z_theta data
+    cube = get_fake_cube_coords()
+    um2nc.fix_level_coord(cube, z_sea_rho_data, z_sea_theta_data)
+
+    assert cube.coord(um2nc.MODEL_LEVEL_NUM).var_name == "model_theta_level_number"
+    assert cube.coord(um2nc.LEVEL_HEIGHT).var_name == "theta_level_height"
+    assert cube.coord(um2nc.SIGMA).var_name == "sigma_theta"
+
+
+def test_fix_level_coord_skipped_if_no_levels(z_sea_rho_data, z_sea_theta_data):
+    # ensures level fixes are skipped if the cube lacks model levels, sigma etc
+    m_cube = mock.Mock(iris.cube.Cube)
+    m_cube.coord.side_effect = iris.exceptions.CoordinateNotFoundError
+    um2nc.fix_level_coord(m_cube, z_sea_rho_data, z_sea_theta_data)
+
+
+# int64 to int32 data conversion tests
+# NB: skip float64 to float32 overflow as float32 min/max is huge: -/+ 3.40e+38
+@pytest.mark.parametrize("array,_operator,bound",
+                         [([100, 10, 1, 0, -10], None, None),
+                          ([3000000000], operator.gt, np.iinfo(np.int32).max),
+                          ([-3000000000], operator.lt, np.iinfo(np.int32).min)])
+def test_convert_32_bit(ua_plev_cube, array, _operator, bound):
+    ua_plev_cube.data = np.array(array, dtype=np.int64)
+    um2nc.convert_32_bit(ua_plev_cube)
+
+    if _operator:
+        assert _operator(array[0], bound)
+
+    assert ua_plev_cube.data.dtype == np.int32
+
+
+# test float conversion separately, otherwise parametrize block is ugly
+def test_convert_32_bit_with_float64(ua_plev_cube):
+    array = np.array([300.33, 30.456, 3.04, 0.0, -30.667], dtype=np.float64)
+    ua_plev_cube.data = array
+    um2nc.convert_32_bit(ua_plev_cube)
+    assert ua_plev_cube.data.dtype == np.float32
diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py
@@ -66,6 +66,10 @@
     4: 'NETCDF4_CLASSIC'
 }
 
+MODEL_LEVEL_NUM = "model_level_number"
+LEVEL_HEIGHT = "level_height"
+SIGMA = "sigma"
+
 
 class PostProcessingError(Exception):
     """Generic class for um2nc specific errors."""
@@ -315,33 +319,6 @@ def fix_latlon_coords(cube, grid_type, dlat, dlon):
     fix_lon_coord_name(longitude_coordinate, grid_type, dlon)
 
 
-# TODO: refactor to "rename level coord"
-# TODO: move this to func renaming section?
-def fix_level_coord(cube, z_rho, z_theta):
-    # Rename model_level_number coordinates to better distinguish rho and theta levels
-    try:
-        c_lev = cube.coord('model_level_number')
-        c_height = cube.coord('level_height')
-        c_sigma = cube.coord('sigma')
-    except iris.exceptions.CoordinateNotFoundError:
-        c_lev = None
-        c_height = None
-        c_sigma = None
-
-    if c_lev:
-        d_rho = abs(c_height.points[0]-z_rho)
-        if d_rho.min() < 1e-6:
-            c_lev.var_name = 'model_rho_level_number'
-            c_height.var_name = 'rho_level_height'
-            c_sigma.var_name = 'sigma_rho'
-        else:
-            d_theta = abs(c_height.points[0]-z_theta)
-            if d_theta.min() < 1e-6:
-                c_lev.var_name = 'model_theta_level_number'
-                c_height.var_name = 'theta_level_height'
-                c_sigma.var_name = 'sigma_theta'
-
-
 # TODO: split cube ops into functions, this will likely increase process() workflow steps
 def cubewrite(cube, sman, compression, use64bit, verbose):
     try:
@@ -356,11 +333,9 @@ def cubewrite(cube, sman, compression, use64bit, verbose):
     except iris.exceptions.CoordinateNotFoundError:
         pass
 
+    # TODO: flag warnings as an error for the driver script?
     if not use64bit:
-        if cube.data.dtype == 'float64':
-            cube.data = cube.data.astype(np.float32)
-        elif cube.data.dtype == 'int64':
-            cube.data = cube.data.astype(np.int32)
+        convert_32_bit(cube)
 
     # Set the missing_value attribute. Use an array to force the type to match
     # the data type
@@ -890,6 +865,77 @@ def fix_units(cube, um_var_units, verbose: bool):
             cube.units = um_var_units
 
 
+def fix_level_coord(cube, z_rho, z_theta, tol=1e-6):
+    """
+    Renames model_level_number coordinates to help distinguish rho/theta levels.
+
+    Cubes without 'model_level_number' coordinates are skipped.
+
+    Parameters
+    ----------
+    cube : iris.cube.Cube object for in place modification
+    z_rho : geopotential height of the sea free surface
+    z_theta : density (rho) of the air at sea level
+    tol : height tolerance
+    """
+    # TODO: this is called once per cube and many lack the model_level_number
+    #       coord. Is a potential optimisation possible from pre-specifying a
+    #       list of cubes with model_level_numbers & only processing these?
+    try:
+        c_lev = cube.coord(MODEL_LEVEL_NUM)
+        c_height = cube.coord(LEVEL_HEIGHT)
+        c_sigma = cube.coord(SIGMA)
+    except iris.exceptions.CoordinateNotFoundError:
+        return
+
+    if c_lev:
+        d_rho = abs(c_height.points[0]-z_rho)
+        if d_rho.min() < tol:
+            c_lev.var_name = 'model_rho_level_number'
+            c_height.var_name = 'rho_level_height'
+            c_sigma.var_name = 'sigma_rho'
+        else:
+            d_theta = abs(c_height.points[0]-z_theta)
+            if d_theta.min() < tol:
+                c_lev.var_name = 'model_theta_level_number'
+                c_height.var_name = 'theta_level_height'
+                c_sigma.var_name = 'sigma_theta'
+
+
+MAX_NP_INT32 = np.iinfo(np.int32).max
+MIN_NP_INT32 = np.iinfo(np.int32).min
+
+
+def convert_32_bit(cube):
+    """
+    Convert 64 bit int/float data to 32 bit (in place).
+
+    Parameters
+    ----------
+    cube : iris.cube object to modify.
+
+    Warns
+    -----
+    RuntimeWarning : if the cube has data over 32-bit limits, causing an overflow.
+    """
+    if cube.data.dtype == 'float64':
+        cube.data = cube.data.astype(np.float32)
+    elif cube.data.dtype == 'int64':
+        _max = np.max(cube.data)
+        _min = np.min(cube.data)
+
+        msg = (f"32 bit under/overflow converting {cube.var_name}! Output data "
+               f"likely invalid. Use '--64' option to retain data integrity.")
+
+        if _max > MAX_NP_INT32:
+            warnings.warn(msg, category=RuntimeWarning)
+
+        if _min < MIN_NP_INT32:
+            warnings.warn(msg, category=RuntimeWarning)
+
+        cube.data = cube.data.astype(np.int32)
+
+
 def parse_args():
     parser = argparse.ArgumentParser(description="Convert UM fieldsfile to netcdf")
     parser.add_argument('-k', dest='nckind', required=False, type=int,