ACCESS-NRI · blimlim · Sep 25, 2024 · Sep 27, 2024 · Sep 27, 2024 · Sep 27, 2024
diff --git a/test/test_um2netcdf.py b/test/test_um2netcdf.py
@@ -15,6 +15,8 @@
 import iris.coords
 import iris.exceptions
 
+from netCDF4 import default_fillvals
+
 D_LAT_N96 = 1.25   # Degrees between latitude points on N96 grid
 D_LON_N96 = 1.875  # Degrees between longitude points on N96 grid
 
@@ -1207,3 +1209,68 @@ def test_convert_32_bit_with_float64(ua_plev_cube):
     ua_plev_cube.data = array
     um2nc.convert_32_bit(ua_plev_cube)
     assert ua_plev_cube.data.dtype == np.float32
+
+
+@pytest.mark.parametrize(
+    "cube_data, expected_fill_val",
+    [
+        (np.array([1.1, 2.1], dtype="float32"),
+         np.float32(um2nc.DEFAULT_FILL_VAL_FLOAT)),
+        (np.array([1.1, 2.1], dtype="float64"),
+         np.float64(um2nc.DEFAULT_FILL_VAL_FLOAT)),
+        (np.array([1.1, 2.1], dtype="complex64"),
+         np.complex64(default_fillvals["c8"])),
+        (np.array([1, 2], dtype="int32"),
+         np.int32(default_fillvals["i4"])),
+        (np.array([1, 2], dtype="int64"),
+         np.int64(default_fillvals["i8"]))
+    ]
+)
+def test_fix_fill_value_defaults(cube_data, expected_fill_val):
+    """
+    Check that correct default fill values are found based
+    on a cube's data's type.
+    """
+    fake_cube = DummyCube(12345, "fake_var", attributes={})
+    fake_cube.data = cube_data
+
+    fill_value = um2nc.fix_fill_value(fake_cube)
+
+    assert fill_value == expected_fill_val
+    # Check new fill value type matches cube's data's type
+    assert type(fill_value) == cube_data.dtype
+
+    # Check that missing value attribute set to expected fill_value
+    assert fake_cube.attributes["missing_value"][0] == expected_fill_val
+
+
+def test_fix_fill_value_custom():
+    """
+    Check that custom fill values overwrite the defaults.
+    """
+    fake_cube = DummyCube(12345, "fake_var", attributes={})
+    fake_cube.data = np.array([1.1, 2.2], dtype="float64")
+    custom_val = np.float64(123.456)
+
+    fill_value = um2nc.fix_fill_value(fake_cube, custom_fill_value=custom_val)
+
+    # Check that the custom fill value is returned
+    assert fill_value == custom_val
+
+    # Check that missing value attribute set to the custom fill value
+    assert fake_cube.attributes["missing_value"][0] == custom_val
+
+
+def test_fix_fill_value_wrong_type():
+    """
+    Check that an error is raised when supplied fill value's
+    type does not match the cube's data's type.
+    """
+    fake_cube = DummyCube(12345, "fake_var", attributes={})
+    fake_cube.data = np.array([1, 2, 3], dtype="int32")
+    custom_val = np.float32(151.11)
+
+    assert fake_cube.data.dtype != custom_val.dtype
+
+    with pytest.raises(TypeError):
+        um2nc.fix_fill_value(fake_cube, custom_fill_value=custom_val)
diff --git a/umpost/um2netcdf.py b/umpost/um2netcdf.py
@@ -70,6 +70,8 @@
 LEVEL_HEIGHT = "level_height"
 SIGMA = "sigma"
 
+DEFAULT_FILL_VAL_FLOAT = 1.e20
+
 
 class PostProcessingError(Exception):
     """Generic class for um2nc specific errors."""
@@ -319,6 +321,66 @@ def fix_latlon_coords(cube, grid_type, dlat, dlon):
     fix_lon_coord_name(longitude_coordinate, grid_type, dlon)
 
 
+def get_default_fill_value(cube):
+    """
+    Get the default fill value for a cube's data's type.
+
+    Parameters
+    ----------
+    cube: Iris cube object.
+
+    Returns
+    -------
+    fill_value: numpy scalar with type matching cube.data
+    """
+    if cube.data.dtype.kind == 'f':
+        fill_value = DEFAULT_FILL_VAL_FLOAT
+
+    else:
+        fill_value = default_fillvals[
+            f"{cube.data.dtype.kind:s}{cube.data.dtype.itemsize:1d}"
+        ]
+
+    # NB: the `_FillValue` attribute appears to be converted to match the
+    # cube data's type externally (likely in the netCDF4 library). It's not
+    # strictly necessary to do the conversion here. However, we need
+    # the separate `missing_value` attribute to have the correct type
+    # and so it's cleaner to set the type for both here.
+
+    # TODO: Is there a cleaner way do do the following conversion?
+    return np.array([fill_value], dtype=cube.data.dtype)[0]
+
+
+def fix_fill_value(cube, custom_fill_value=None):
+    """
+    Set a cube's missing_value attribute.
+
+    Parameters
+    ----------
+    cube: Iris cube object (modified in place).
+    custom_fill_value: Fill value to use in place of defaults.
+    Type must match cube data's type.
+    """
+    if custom_fill_value is not None:
+        fill_value = custom_fill_value
+    else:
+        fill_value = get_default_fill_value(cube)
+
+    if type(fill_value) == cube.data.dtype:
+        # TODO: Is placing the fill value in an array still necessary,
+        # given the added fill value checks?
+
+        # Use an array to force the type to match the data type
+        cube.attributes['missing_value'] = np.array([fill_value],
+                                                    cube.data.dtype)
+        return fill_value
+
+    else:
+        msg = (f"fill_val type {type(fill_value)} does not "
+               f"match cube {cube.name()} data type {cube.data.dtype}.")
+        raise TypeError(msg)
+
+
 # TODO: split cube ops into functions, this will likely increase process() workflow steps
 def cubewrite(cube, sman, compression, use64bit, verbose):
     # TODO: move into process() AND if a new cube is returned, swap into filtered cube list
@@ -328,15 +390,7 @@ def cubewrite(cube, sman, compression, use64bit, verbose):
     if not use64bit:
         convert_32_bit(cube)
 
-    # Set the missing_value attribute. Use an array to force the type to match
-    # the data type
-    if cube.data.dtype.kind == 'f':
-        fill_value = 1.e20
-    else:
-        # Use netCDF defaults
-        fill_value = default_fillvals['%s%1d' % (cube.data.dtype.kind, cube.data.dtype.itemsize)]
-
-    cube.attributes['missing_value'] = np.array([fill_value], cube.data.dtype)
+    fill_value = fix_fill_value(cube)
 
     # If reference date is before 1600 use proleptic gregorian
     # calendar and change units from hours to days