diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml index d9a0df34..ebdd3c18 100644 --- a/.github/workflows/linter.yml +++ b/.github/workflows/linter.yml @@ -24,7 +24,7 @@ jobs: - name: Install python dependencies run: | python -m pip install --upgrade pip - pip install pytest pylint==2.17.5 python-dateutil==2.8.2 pint==0.21 importlib-metadata==6.7.0 jsonschema==4.19.0 pika==1.3.1 pyproj numpy==1.26.2 shapely==2.0.2 + pip install pytest pylint==2.17.5 python-dateutil==2.8.2 pint==0.21 importlib-metadata==6.7.0 jsonschema==4.19.0 pika==1.3.1 pyproj numpy==1.26.2 shapely==2.0.2 netcdf4==1.6.3 h5netcdf==1.1.0 - name: Set PYTHONPATH for pylint run: | diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 60074a47..773bd4d9 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -23,7 +23,7 @@ jobs: - name: Install python dependencies run: | python -m pip install --upgrade pip - pip install pytest pylint==2.17.5 python-dateutil==2.8.2 pint==0.21 importlib-metadata==6.7.0 jsonschema==4.19.0 pika==1.3.1 pyproj numpy==1.26.2 shapely==2.0.2 pytest-cov==4.1.0 + pip install pytest pylint==2.17.5 python-dateutil==2.8.2 pint==0.21 importlib-metadata==6.7.0 jsonschema==4.19.0 pika==1.3.1 pyproj numpy==1.26.2 shapely==2.0.2 pytest-cov==4.1.0 netcdf4==1.6.3 h5netcdf==1.1.0 pytest-cov==4.1.0 - name: Set PYTHONPATH for pytest run: | diff --git a/python/idsse_common/idsse/common/netcdf_io.py b/python/idsse_common/idsse/common/netcdf_io.py new file mode 100644 index 00000000..a6aaa677 --- /dev/null +++ b/python/idsse_common/idsse/common/netcdf_io.py @@ -0,0 +1,133 @@ +"""Utilities for reading NetCDF files""" +# ---------------------------------------------------------------------------------- +# Created on Mon Feb 13 2023 +# +# Copyright (c) 2023 Regents of the University of Colorado. All rights reserved. (1) +# Copyright (c) 2023 Colorado State University. All rights reserved. (2) +# +# Contributors: +# Geary J Layne (1) +# Mackenzie Grimes (2) +# +# ---------------------------------------------------------------------------------- + +import logging +import os +from typing import List, Protocol, Tuple + +from netCDF4 import Dataset # pylint: disable=no-name-in-module +import h5netcdf as h5nc +import numpy as np + +logger = logging.getLogger(__name__) + + +# cSpell:ignore ncattrs, getncattr, maskandscale +class HasNcAttr(Protocol): + """Protocol that allows retrieving attributes""" + def ncattrs(self) -> List[str]: + """Gives access to list of keys + + Returns: + List[str]: Keys names for the attributes + """ + + def getncattr(self, key: str) -> any: + """Gives access to value for specific key + + Args: + key (str): Name of attribute to be retrieved + + Returns: + any: The requested attribute, of unknown type + """ + + +def read_netcdf_global_attrs(filepath: str) -> dict: + """Read the global attributes from a Netcdf file + + Args: + filepath (str): Path to Netcdf file + + Returns: + dict: Global attributes as dictionary + """ + return _read_attrs(Dataset(filepath)) + + +def read_netcdf(filepath: str, use_h5_lib = False) -> Tuple[dict, np.ndarray]: + """Reads DAS Netcdf file. + + Args: + filepath (str): Path to DAS Netcdf file + use_h5_lib: (bool): if True, python library h5netcdf will be used to do file I/O. + If False, netCDF4 library will be used. Default is False (netcdf4 will be used). + + Returns: + Tuple[dict, np.ndarray]: Global attributes and data + """ + if use_h5_lib: + with h5nc.File(filepath, 'r') as nc_file: + grid = nc_file.variables['grid'][:] + return nc_file.attrs, grid + + # otherwise, use netcdf4 library (default) + with Dataset(filepath) as dataset: + dataset.set_auto_maskandscale(False) + grid = dataset.variables['grid'][:] + + global_attrs = _read_attrs(dataset) + return global_attrs, grid + + +def write_netcdf(attrs: dict, grid: np.ndarray, filepath: str, use_h5_lib = False) -> str: + """Store data and attributes to a Netcdf4 file + + Args: + attrs (dict): Attribute relative to the data to be written + grid (np.array): Numpy array of data + filepath (str): String representation of where to write the file + use_h5_lib: (bool): if True, python library h5netcdf will be used to do file I/O. + If False, netCDF4 library will be used. Default is False (netCDF4 will be used). + + Returns: + str: The location that data was written to + """ + _make_dirs(filepath) + logger.debug('Writing data to: %s', filepath) + + if use_h5_lib: + with h5nc.File(filepath, 'w') as file: + y_dimensions, x_dimensions = grid.shape + # set dimensions with a dictionary + file.dimensions = {'x': x_dimensions, 'y': y_dimensions} + + grid_var = file.create_variable('grid', ('y', 'x'), 'f4') + grid_var[:] = grid + + for key, value in attrs.items(): + file.attrs[key] = value + + else: + # otherwise, write file using netCDF4 library (default) + with Dataset(filepath, 'w', format='NETCDF4') as dataset: + y_dimensions, x_dimensions = grid.shape + dataset.createDimension('x', x_dimensions) + dataset.createDimension('y', y_dimensions) + + grid_var = dataset.createVariable('grid', 'f4', ('y', 'x')) + grid_var[:] = grid + + for key, value in attrs.items(): + setattr(dataset, key, str(value)) + + return filepath + + +def _make_dirs(filename: str): + dirname = os.path.dirname(os.path.abspath(filename)) + os.makedirs(dirname, exist_ok=True) + + +def _read_attrs(has_nc_attr: HasNcAttr) -> dict: + return {key: has_nc_attr.getncattr(key) for key in has_nc_attr.ncattrs()} diff --git a/python/idsse_common/test/resources/gridstore55657865.nc b/python/idsse_common/test/resources/gridstore55657865.nc new file mode 100644 index 00000000..c5d39fac Binary files /dev/null and b/python/idsse_common/test/resources/gridstore55657865.nc differ diff --git a/python/idsse_common/test/test_netcdf_io.py b/python/idsse_common/test/test_netcdf_io.py new file mode 100644 index 00000000..31a07a75 --- /dev/null +++ b/python/idsse_common/test/test_netcdf_io.py @@ -0,0 +1,117 @@ +"""Test suite for netcdf_io.py""" +# -------------------------------------------------------------------------------- +# Created on Mon May 1 2023 +# +# Copyright (c) 2023 Regents of the University of Colorado. All rights reserved. +# +# Contributors: +# Geary J Layne +# +# -------------------------------------------------------------------------------- +# pylint: disable=missing-function-docstring,redefined-outer-name,protected-access,unused-argument + +import os +from typing import Dict, Tuple + +from pytest import fixture, approx +from numpy import ndarray + +from idsse.common.netcdf_io import read_netcdf, read_netcdf_global_attrs, write_netcdf + + +# test data +EXAMPLE_NETCDF_FILEPATH = os.path.join( + os.path.dirname(__file__), + 'resources', + 'gridstore55657865.nc' +) + +EXAMPLE_ATTRIBUTES = { + 'product': 'NBM.AWS.GRIB', + 'field': 'TEMP', + 'valid_dt': '2022-11-11 17:00:00+00:00', + 'issue_dt': '2022-11-11 14:00:00+00:00', + 'task': 'data_task', + 'region': 'CO', + 'units': 'Fahrenheit', + 'proj_name': 'NBM', + 'proj_spec': '+proj=lcc +lat_0=25.0 +lon_0=-95.0 +lat_1=25.0 +r=6371200', + 'grid_spec': '+dx=2539.703 +dy=2539.703 +w=2345 +h=1597 +lat_ll=19.229 +lon_ll=-126.2766', + 'data_key': 'NBM.AWS.GRIB:CO:TEMP::Fahrenheit::20221111140000.20221111170000' +} + +EXAMPLE_PROD_KEY = ( + 'product:NBM.AWS.GRIB-field:TEMP-issue:20221111140000-valid:20221112000000-units:Fahrenheit' +) + + +# pytest fixtures +@fixture +def example_netcdf_data() -> Tuple[Dict[str, any], ndarray]: + return read_netcdf(EXAMPLE_NETCDF_FILEPATH) + + +# tests +def test_read_netcdf_global_attrs(): + attrs = read_netcdf_global_attrs(EXAMPLE_NETCDF_FILEPATH) + + assert len(attrs) == 11 + assert attrs == EXAMPLE_ATTRIBUTES + + +def test_read_netcdf(example_netcdf_data: Tuple[Dict[str, any], ndarray]): + attrs, grid = example_netcdf_data + + assert grid.shape == (1597, 2345) + x_dimensions, y_dimensions = grid.shape + + assert grid[0][0] == approx(72.98599) + assert grid[round(x_dimensions / 2)][round(y_dimensions / 2)] == approx(12.505991) + assert grid[x_dimensions - 1][y_dimensions - 1] == approx(2.4259913) + + assert attrs == EXAMPLE_ATTRIBUTES + + +def test_read_and_write_netcdf(example_netcdf_data: Tuple[Dict[str, any], ndarray]): + # cleanup existing test file if needed + temp_netcdf_filepath = './tmp/test_netcdf_file.nc' + if os.path.exists(temp_netcdf_filepath): + os.remove(temp_netcdf_filepath) + + attrs, grid = example_netcdf_data + + # verify write_netcdf functionality + attrs['prodKey'] = EXAMPLE_PROD_KEY + attrs['prodSource'] = attrs['product'] + written_filepath = write_netcdf(attrs, grid, temp_netcdf_filepath) + assert written_filepath == temp_netcdf_filepath + assert os.path.exists(temp_netcdf_filepath) + + new_file_attrs, new_file_grid = read_netcdf(written_filepath) + assert new_file_attrs == attrs + assert new_file_grid[123][321] == grid[123][321] + + # cleanup created netcdf file + os.remove(temp_netcdf_filepath) + + +def test_read_and_write_netcdf_with_h5nc(example_netcdf_data: Tuple[Dict[str, any], ndarray]): + # create h5nc file + temp_netcdf_h5_filepath = './tmp/test_netcdf_h5_file.nc' + if os.path.exists(temp_netcdf_h5_filepath): + os.remove(temp_netcdf_h5_filepath) + + attrs, grid = example_netcdf_data + + # verify write_netcdf_with_h5nc functionality + attrs['prodKey'] = EXAMPLE_PROD_KEY + attrs['prodSource'] = attrs['product'] + written_filepath = write_netcdf(attrs, grid, temp_netcdf_h5_filepath, use_h5_lib=True) + assert written_filepath == temp_netcdf_h5_filepath + + # Don't verify h5 attrs for now; they are some custom h5py type and aren't easy to access + _, new_file_grid = read_netcdf(written_filepath, use_h5_lib=True) + assert new_file_grid[123][321] == grid[123][321] + + # cleanup created netcdf h5 file + os.remove(temp_netcdf_h5_filepath)