Skip to content

Commit

Permalink
Merge branch 'main' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
blimlim authored Sep 23, 2024
2 parents 0ee303a + 92096ce commit 99b89ea
Show file tree
Hide file tree
Showing 2 changed files with 171 additions and 33 deletions.
96 changes: 94 additions & 2 deletions test/test_um2netcdf.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import unittest.mock as mock
from dataclasses import dataclass
from collections import namedtuple
import numpy as np
from iris.exceptions import CoordinateNotFoundError
import operator

import umpost.um2netcdf as um2nc

Expand All @@ -13,6 +13,7 @@
import mule.ff
import iris.cube
import iris.coords
import iris.exceptions

D_LAT_N96 = 1.25 # Degrees between latitude points on N96 grid
D_LON_N96 = 1.875 # Degrees between longitude points on N96 grid
Expand Down Expand Up @@ -403,12 +404,14 @@ def __init__(self, item_code, var_name=None, attributes=None, units=None):
self.units = None or units
self.standard_name = None
self.long_name = None

self.data = None

def coord(self, _):
raise NotImplementedError(
"coord() method not implemented on DummyCube"
)


def name(self):
# mimic iris API
return self.var_name
Expand Down Expand Up @@ -1038,3 +1041,92 @@ def test_fix_cell_methods_keep_weeks():
assert mod.method == cm.method
assert mod.coord_names == cm.coord_names
assert mod.intervals[0] == "week"


@pytest.fixture
def level_heights():
# NB: sourced from z_sea_theta_data fixture. This "array" is cropped as
# fix_level_coords() only accesses height array[0]
return [20.0003377]

@pytest.fixture
def level_coords(level_heights):
return {um2nc.MODEL_LEVEL_NUM: iris.coords.DimCoord(range(1, 39)),
um2nc.LEVEL_HEIGHT: iris.coords.DimCoord(level_heights),
um2nc.SIGMA: iris.coords.AuxCoord(np.array([0.99771646]))}

@pytest.fixture
def get_fake_cube_coords(level_coords):

@dataclass
class FakeCubeCoords:
"""Test object to represent a cube with a coords() access function."""

def coord(self, key):
return level_coords[key]

# return class for instantiation in tests
return FakeCubeCoords


def test_fix_level_coord_modify_cube_with_rho(level_heights,
get_fake_cube_coords,
z_sea_rho_data,
z_sea_theta_data):
# verify cube renaming with appropriate z_rho data
cube = get_fake_cube_coords()
assert cube.coord(um2nc.MODEL_LEVEL_NUM).var_name is None
assert cube.coord(um2nc.LEVEL_HEIGHT).var_name is None
assert cube.coord(um2nc.SIGMA).var_name is None


rho = np.ones(z_sea_theta_data.shape) * level_heights[0]
um2nc.fix_level_coord(cube, rho, z_sea_theta_data)

assert cube.coord(um2nc.MODEL_LEVEL_NUM).var_name == "model_rho_level_number"
assert cube.coord(um2nc.LEVEL_HEIGHT).var_name == "rho_level_height"
assert cube.coord(um2nc.SIGMA).var_name == "sigma_rho"


def test_fix_level_coord_modify_cube_with_theta(level_heights,
get_fake_cube_coords,
z_sea_rho_data,
z_sea_theta_data):
# verify cube renaming with appropriate z_theta data
cube = get_fake_cube_coords()
um2nc.fix_level_coord(cube, z_sea_rho_data, z_sea_theta_data)

assert cube.coord(um2nc.MODEL_LEVEL_NUM).var_name == "model_theta_level_number"
assert cube.coord(um2nc.LEVEL_HEIGHT).var_name == "theta_level_height"
assert cube.coord(um2nc.SIGMA).var_name == "sigma_theta"


def test_fix_level_coord_skipped_if_no_levels(z_sea_rho_data, z_sea_theta_data):
# ensures level fixes are skipped if the cube lacks model levels, sigma etc
m_cube = mock.Mock(iris.cube.Cube)
m_cube.coord.side_effect = iris.exceptions.CoordinateNotFoundError
um2nc.fix_level_coord(m_cube, z_sea_rho_data, z_sea_theta_data)


# int64 to int32 data conversion tests
# NB: skip float64 to float32 overflow as float32 min/max is huge: -/+ 3.40e+38
@pytest.mark.parametrize("array,_operator,bound",
[([100, 10, 1, 0, -10], None, None),
([3000000000], operator.gt, np.iinfo(np.int32).max),
([-3000000000], operator.lt, np.iinfo(np.int32).min)])
def test_convert_32_bit(ua_plev_cube, array, _operator, bound):
ua_plev_cube.data = np.array(array, dtype=np.int64)
um2nc.convert_32_bit(ua_plev_cube)

if _operator:
assert _operator(array[0], bound)

assert ua_plev_cube.data.dtype == np.int32


# test float conversion separately, otherwise parametrize block is ugly
def test_convert_32_bit_with_float64(ua_plev_cube):
array = np.array([300.33, 30.456, 3.04, 0.0, -30.667], dtype=np.float64)
ua_plev_cube.data = array
um2nc.convert_32_bit(ua_plev_cube)
assert ua_plev_cube.data.dtype == np.float32
108 changes: 77 additions & 31 deletions umpost/um2netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@
4: 'NETCDF4_CLASSIC'
}

MODEL_LEVEL_NUM = "model_level_number"
LEVEL_HEIGHT = "level_height"
SIGMA = "sigma"


class PostProcessingError(Exception):
"""Generic class for um2nc specific errors."""
Expand Down Expand Up @@ -315,33 +319,6 @@ def fix_latlon_coords(cube, grid_type, dlat, dlon):
fix_lon_coord_name(longitude_coordinate, grid_type, dlon)


# TODO: refactor to "rename level coord"
# TODO: move this to func renaming section?
def fix_level_coord(cube, z_rho, z_theta):
# Rename model_level_number coordinates to better distinguish rho and theta levels
try:
c_lev = cube.coord('model_level_number')
c_height = cube.coord('level_height')
c_sigma = cube.coord('sigma')
except iris.exceptions.CoordinateNotFoundError:
c_lev = None
c_height = None
c_sigma = None

if c_lev:
d_rho = abs(c_height.points[0]-z_rho)
if d_rho.min() < 1e-6:
c_lev.var_name = 'model_rho_level_number'
c_height.var_name = 'rho_level_height'
c_sigma.var_name = 'sigma_rho'
else:
d_theta = abs(c_height.points[0]-z_theta)
if d_theta.min() < 1e-6:
c_lev.var_name = 'model_theta_level_number'
c_height.var_name = 'theta_level_height'
c_sigma.var_name = 'sigma_theta'


# TODO: split cube ops into functions, this will likely increase process() workflow steps
def cubewrite(cube, sman, compression, use64bit, verbose):
try:
Expand All @@ -356,11 +333,9 @@ def cubewrite(cube, sman, compression, use64bit, verbose):
except iris.exceptions.CoordinateNotFoundError:
pass

# TODO: flag warnings as an error for the driver script?
if not use64bit:
if cube.data.dtype == 'float64':
cube.data = cube.data.astype(np.float32)
elif cube.data.dtype == 'int64':
cube.data = cube.data.astype(np.int32)
convert_32_bit(cube)

# Set the missing_value attribute. Use an array to force the type to match
# the data type
Expand Down Expand Up @@ -890,6 +865,77 @@ def fix_units(cube, um_var_units, verbose: bool):
cube.units = um_var_units


def fix_level_coord(cube, z_rho, z_theta, tol=1e-6):
"""
Renames model_level_number coordinates to help distinguish rho/theta levels.
Cubes without 'model_level_number' coordinates are skipped.
Parameters
----------
cube : iris.cube.Cube object for in place modification
z_rho : geopotential height of the sea free surface
z_theta : density (rho) of the air at sea level
tol : height tolerance
"""
# TODO: this is called once per cube and many lack the model_level_number
# coord. Is a potential optimisation possible from pre-specifying a
# list of cubes with model_level_numbers & only processing these?
try:
c_lev = cube.coord(MODEL_LEVEL_NUM)
c_height = cube.coord(LEVEL_HEIGHT)
c_sigma = cube.coord(SIGMA)
except iris.exceptions.CoordinateNotFoundError:
return

if c_lev:
d_rho = abs(c_height.points[0]-z_rho)
if d_rho.min() < tol:
c_lev.var_name = 'model_rho_level_number'
c_height.var_name = 'rho_level_height'
c_sigma.var_name = 'sigma_rho'
else:
d_theta = abs(c_height.points[0]-z_theta)
if d_theta.min() < tol:
c_lev.var_name = 'model_theta_level_number'
c_height.var_name = 'theta_level_height'
c_sigma.var_name = 'sigma_theta'


MAX_NP_INT32 = np.iinfo(np.int32).max
MIN_NP_INT32 = np.iinfo(np.int32).min


def convert_32_bit(cube):
"""
Convert 64 bit int/float data to 32 bit (in place).
Parameters
----------
cube : iris.cube object to modify.
Warns
-----
RuntimeWarning : if the cube has data over 32-bit limits, causing an overflow.
"""
if cube.data.dtype == 'float64':
cube.data = cube.data.astype(np.float32)
elif cube.data.dtype == 'int64':
_max = np.max(cube.data)
_min = np.min(cube.data)

msg = (f"32 bit under/overflow converting {cube.var_name}! Output data "
f"likely invalid. Use '--64' option to retain data integrity.")

if _max > MAX_NP_INT32:
warnings.warn(msg, category=RuntimeWarning)

if _min < MIN_NP_INT32:
warnings.warn(msg, category=RuntimeWarning)

cube.data = cube.data.astype(np.int32)


def parse_args():
parser = argparse.ArgumentParser(description="Convert UM fieldsfile to netcdf")
parser.add_argument('-k', dest='nckind', required=False, type=int,
Expand Down

0 comments on commit 99b89ea

Please sign in to comment.