Skip to content

Commit

Permalink
Feat: move netcdf_io utility from DAS (#42)
Browse files Browse the repository at this point in the history
* add netcdf write functions
* add netcdf librarys to linter and pytest GitHub actions
  • Loading branch information
mackenzie-grimes-noaa authored Dec 15, 2023
1 parent 7242857 commit 1b8a18c
Show file tree
Hide file tree
Showing 5 changed files with 252 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/linter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
- name: Install python dependencies
run: |
python -m pip install --upgrade pip
pip install pytest pylint==2.17.5 python-dateutil==2.8.2 pint==0.21 importlib-metadata==6.7.0 jsonschema==4.19.0 pika==1.3.1 pyproj numpy==1.26.2 shapely==2.0.2
pip install pytest pylint==2.17.5 python-dateutil==2.8.2 pint==0.21 importlib-metadata==6.7.0 jsonschema==4.19.0 pika==1.3.1 pyproj numpy==1.26.2 shapely==2.0.2 netcdf4==1.6.3 h5netcdf==1.1.0
- name: Set PYTHONPATH for pylint
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
- name: Install python dependencies
run: |
python -m pip install --upgrade pip
pip install pytest pylint==2.17.5 python-dateutil==2.8.2 pint==0.21 importlib-metadata==6.7.0 jsonschema==4.19.0 pika==1.3.1 pyproj numpy==1.26.2 shapely==2.0.2 pytest-cov==4.1.0
pip install pytest pylint==2.17.5 python-dateutil==2.8.2 pint==0.21 importlib-metadata==6.7.0 jsonschema==4.19.0 pika==1.3.1 pyproj numpy==1.26.2 shapely==2.0.2 pytest-cov==4.1.0 netcdf4==1.6.3 h5netcdf==1.1.0 pytest-cov==4.1.0
- name: Set PYTHONPATH for pytest
run: |
Expand Down
133 changes: 133 additions & 0 deletions python/idsse_common/idsse/common/netcdf_io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
"""Utilities for reading NetCDF files"""
# ----------------------------------------------------------------------------------
# Created on Mon Feb 13 2023
#
# Copyright (c) 2023 Regents of the University of Colorado. All rights reserved. (1)
# Copyright (c) 2023 Colorado State University. All rights reserved. (2)
#
# Contributors:
# Geary J Layne (1)
# Mackenzie Grimes (2)
#
# ----------------------------------------------------------------------------------

import logging
import os
from typing import List, Protocol, Tuple

from netCDF4 import Dataset # pylint: disable=no-name-in-module
import h5netcdf as h5nc
import numpy as np

logger = logging.getLogger(__name__)


# cSpell:ignore ncattrs, getncattr, maskandscale
class HasNcAttr(Protocol):
"""Protocol that allows retrieving attributes"""
def ncattrs(self) -> List[str]:
"""Gives access to list of keys
Returns:
List[str]: Keys names for the attributes
"""

def getncattr(self, key: str) -> any:
"""Gives access to value for specific key
Args:
key (str): Name of attribute to be retrieved
Returns:
any: The requested attribute, of unknown type
"""


def read_netcdf_global_attrs(filepath: str) -> dict:
"""Read the global attributes from a Netcdf file
Args:
filepath (str): Path to Netcdf file
Returns:
dict: Global attributes as dictionary
"""
return _read_attrs(Dataset(filepath))


def read_netcdf(filepath: str, use_h5_lib = False) -> Tuple[dict, np.ndarray]:
"""Reads DAS Netcdf file.
Args:
filepath (str): Path to DAS Netcdf file
use_h5_lib: (bool): if True, python library h5netcdf will be used to do file I/O.
If False, netCDF4 library will be used. Default is False (netcdf4 will be used).
Returns:
Tuple[dict, np.ndarray]: Global attributes and data
"""
if use_h5_lib:
with h5nc.File(filepath, 'r') as nc_file:
grid = nc_file.variables['grid'][:]
return nc_file.attrs, grid

# otherwise, use netcdf4 library (default)
with Dataset(filepath) as dataset:
dataset.set_auto_maskandscale(False)
grid = dataset.variables['grid'][:]

global_attrs = _read_attrs(dataset)
return global_attrs, grid


def write_netcdf(attrs: dict, grid: np.ndarray, filepath: str, use_h5_lib = False) -> str:
"""Store data and attributes to a Netcdf4 file
Args:
attrs (dict): Attribute relative to the data to be written
grid (np.array): Numpy array of data
filepath (str): String representation of where to write the file
use_h5_lib: (bool): if True, python library h5netcdf will be used to do file I/O.
If False, netCDF4 library will be used. Default is False (netCDF4 will be used).
Returns:
str: The location that data was written to
"""
_make_dirs(filepath)
logger.debug('Writing data to: %s', filepath)

if use_h5_lib:
with h5nc.File(filepath, 'w') as file:
y_dimensions, x_dimensions = grid.shape
# set dimensions with a dictionary
file.dimensions = {'x': x_dimensions, 'y': y_dimensions}

grid_var = file.create_variable('grid', ('y', 'x'), 'f4')
grid_var[:] = grid

for key, value in attrs.items():
file.attrs[key] = value

else:
# otherwise, write file using netCDF4 library (default)
with Dataset(filepath, 'w', format='NETCDF4') as dataset:
y_dimensions, x_dimensions = grid.shape
dataset.createDimension('x', x_dimensions)
dataset.createDimension('y', y_dimensions)

grid_var = dataset.createVariable('grid', 'f4', ('y', 'x'))
grid_var[:] = grid

for key, value in attrs.items():
setattr(dataset, key, str(value))

return filepath


def _make_dirs(filename: str):
dirname = os.path.dirname(os.path.abspath(filename))
os.makedirs(dirname, exist_ok=True)


def _read_attrs(has_nc_attr: HasNcAttr) -> dict:
return {key: has_nc_attr.getncattr(key) for key in has_nc_attr.ncattrs()}
Binary file not shown.
117 changes: 117 additions & 0 deletions python/idsse_common/test/test_netcdf_io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
"""Test suite for netcdf_io.py"""
# --------------------------------------------------------------------------------
# Created on Mon May 1 2023
#
# Copyright (c) 2023 Regents of the University of Colorado. All rights reserved.
#
# Contributors:
# Geary J Layne
#
# --------------------------------------------------------------------------------
# pylint: disable=missing-function-docstring,redefined-outer-name,protected-access,unused-argument

import os
from typing import Dict, Tuple

from pytest import fixture, approx
from numpy import ndarray

from idsse.common.netcdf_io import read_netcdf, read_netcdf_global_attrs, write_netcdf


# test data
EXAMPLE_NETCDF_FILEPATH = os.path.join(
os.path.dirname(__file__),
'resources',
'gridstore55657865.nc'
)

EXAMPLE_ATTRIBUTES = {
'product': 'NBM.AWS.GRIB',
'field': 'TEMP',
'valid_dt': '2022-11-11 17:00:00+00:00',
'issue_dt': '2022-11-11 14:00:00+00:00',
'task': 'data_task',
'region': 'CO',
'units': 'Fahrenheit',
'proj_name': 'NBM',
'proj_spec': '+proj=lcc +lat_0=25.0 +lon_0=-95.0 +lat_1=25.0 +r=6371200',
'grid_spec': '+dx=2539.703 +dy=2539.703 +w=2345 +h=1597 +lat_ll=19.229 +lon_ll=-126.2766',
'data_key': 'NBM.AWS.GRIB:CO:TEMP::Fahrenheit::20221111140000.20221111170000'
}

EXAMPLE_PROD_KEY = (
'product:NBM.AWS.GRIB-field:TEMP-issue:20221111140000-valid:20221112000000-units:Fahrenheit'
)


# pytest fixtures
@fixture
def example_netcdf_data() -> Tuple[Dict[str, any], ndarray]:
return read_netcdf(EXAMPLE_NETCDF_FILEPATH)


# tests
def test_read_netcdf_global_attrs():
attrs = read_netcdf_global_attrs(EXAMPLE_NETCDF_FILEPATH)

assert len(attrs) == 11
assert attrs == EXAMPLE_ATTRIBUTES


def test_read_netcdf(example_netcdf_data: Tuple[Dict[str, any], ndarray]):
attrs, grid = example_netcdf_data

assert grid.shape == (1597, 2345)
x_dimensions, y_dimensions = grid.shape

assert grid[0][0] == approx(72.98599)
assert grid[round(x_dimensions / 2)][round(y_dimensions / 2)] == approx(12.505991)
assert grid[x_dimensions - 1][y_dimensions - 1] == approx(2.4259913)

assert attrs == EXAMPLE_ATTRIBUTES


def test_read_and_write_netcdf(example_netcdf_data: Tuple[Dict[str, any], ndarray]):
# cleanup existing test file if needed
temp_netcdf_filepath = './tmp/test_netcdf_file.nc'
if os.path.exists(temp_netcdf_filepath):
os.remove(temp_netcdf_filepath)

attrs, grid = example_netcdf_data

# verify write_netcdf functionality
attrs['prodKey'] = EXAMPLE_PROD_KEY
attrs['prodSource'] = attrs['product']
written_filepath = write_netcdf(attrs, grid, temp_netcdf_filepath)
assert written_filepath == temp_netcdf_filepath
assert os.path.exists(temp_netcdf_filepath)

new_file_attrs, new_file_grid = read_netcdf(written_filepath)
assert new_file_attrs == attrs
assert new_file_grid[123][321] == grid[123][321]

# cleanup created netcdf file
os.remove(temp_netcdf_filepath)


def test_read_and_write_netcdf_with_h5nc(example_netcdf_data: Tuple[Dict[str, any], ndarray]):
# create h5nc file
temp_netcdf_h5_filepath = './tmp/test_netcdf_h5_file.nc'
if os.path.exists(temp_netcdf_h5_filepath):
os.remove(temp_netcdf_h5_filepath)

attrs, grid = example_netcdf_data

# verify write_netcdf_with_h5nc functionality
attrs['prodKey'] = EXAMPLE_PROD_KEY
attrs['prodSource'] = attrs['product']
written_filepath = write_netcdf(attrs, grid, temp_netcdf_h5_filepath, use_h5_lib=True)
assert written_filepath == temp_netcdf_h5_filepath

# Don't verify h5 attrs for now; they are some custom h5py type and aren't easy to access
_, new_file_grid = read_netcdf(written_filepath, use_h5_lib=True)
assert new_file_grid[123][321] == grid[123][321]

# cleanup created netcdf h5 file
os.remove(temp_netcdf_h5_filepath)

0 comments on commit 1b8a18c

Please sign in to comment.