Skip to content

Commit

Permalink
Add KGE scores
Browse files Browse the repository at this point in the history
* KGE scores
Added KGE notebook to tutorial gallery, and adjusted capitalisation
---------

Signed-off-by: Tennessee Leeuwenburg <[email protected]>
Co-authored-by: shr015 <[email protected]>
Co-authored-by: Tennessee Leeuwenburg <[email protected]>
Co-authored-by: Tennessee Leeuwenburg <[email protected]>
Co-authored-by: Stephanie Chong <[email protected]>
Co-authored-by: Nicholas Loveday <[email protected]>
  • Loading branch information
6 people authored Sep 19, 2024
1 parent 8ff29ee commit e0d8585
Show file tree
Hide file tree
Showing 7 changed files with 666 additions and 2 deletions.
1 change: 1 addition & 0 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
.. autofunction:: scores.continuous.correlation.pearsonr
.. autofunction:: scores.continuous.multiplicative_bias
.. autofunction:: scores.continuous.pbias
.. autofunction:: scores.continuous.kge
.. autofunction:: scores.continuous.isotonic_fit
.. autofunction:: scores.continuous.consistent_expectile_score
.. autofunction:: scores.continuous.consistent_quantile_score
Expand Down
6 changes: 5 additions & 1 deletion docs/included.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,11 @@
* - Isotonic Regression (Isotonic Fit, Reliability Diagram)
- [API](api.md#scores.continuous.isotonic_fit)
- [Tutorial](project:./tutorials/Isotonic_Regression_And_Reliability_Diagrams.md)
- [de Leeuw et al. (2009)](https://doi.org/10.18637/jss.v032.i05); [Dimitriadis et al. (2020)](https://doi.org/10.1073/pnas.2016191118); [Jordan et al. (2020), version 2](https://doi.org/10.48550/arXiv.1904.04761)
- [de Leeuw et al. (2009)](https://doi.org/10.18637/jss.v032.i05); [Dimitriadis et al. (2020)](https://doi.org/10.1073/pnas.2016191118); [Jordan et al. (2020), version 2](https://doi.org/10.48550/arXiv.1904.04761)
* - Kling–Gupta efficiency (KGE)
- [API](api.md#scores.continuous.kge)
- [Tutorial](project:./tutorials/Kling–Gupta_efficiency.md)
- [Gupta et al. (2009)](https://doi.org/10.1016/j.jhydrol.2009.08.003); [Knoben et al. (2019)](https://doi.org/10.5194/hess-23-4323-2019)
* - Mean Absolute Error (MAE)
- [API](api.md#scores.continuous.mae)
- [Tutorial](project:./tutorials/Mean_Absolute_Error.md)
Expand Down
2 changes: 2 additions & 0 deletions src/scores/continuous/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from scores.continuous.quantile_loss_impl import quantile_score
from scores.continuous.standard_impl import (
additive_bias,
kge,
mae,
mean_error,
mse,
Expand Down Expand Up @@ -46,6 +47,7 @@
"mean_error",
"multiplicative_bias",
"pbias",
"kge",
"isotonic_fit",
"consistent_expectile_score",
"consistent_huber_score",
Expand Down
147 changes: 146 additions & 1 deletion src/scores/continuous/standard_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
This module contains standard methods which may be used for continuous scoring
"""

from typing import Optional
from typing import Optional, Union

import numpy as np
import xarray as xr

import scores.functions
Expand Down Expand Up @@ -418,3 +419,147 @@ def pbias(

_pbias = 100 * error.mean(dim=reduce_dims) / obs.mean(dim=reduce_dims)
return _pbias


def kge(
fcst: xr.DataArray,
obs: xr.DataArray,
*,
reduce_dims: Optional[FlexibleDimensionTypes] = None,
preserve_dims: Optional[FlexibleDimensionTypes] = None,
scaling_factors: Optional[Union[list[float], np.ndarray]] = None,
return_components: bool = False,
) -> XarrayLike:
# pylint: disable=too-many-locals
"""
Calculate the Kling-Gupta Efficiency (KGE) between observed and simulated (or forecast) values.
KGE is a performance metric that decomposes the error into three components:
correlation, variability, and bias.
It is computed as:
.. math::
\\text{KGE} = 1 - \\sqrt{\\left[s_\\rho \\cdot (\\rho - 1)\\right]^2 +
\\left[s_\\alpha \\cdot (\\alpha - 1)\\right]^2 + \\left[s_\\beta \\cdot (\\beta - 1)\\right]^2}
.. math::
\\rho = \\frac{\\text{cov}(x, y)}{\\sigma_x \\sigma_y}
.. math::
\\alpha = \\frac{\\sigma_x}{\\sigma_y}
.. math::
\\beta = \\frac{\\mu_x}{\\mu_y}
where:
- :math:`\\rho` = Pearson's correlation coefficient between observed and forecast values
- :math:`\\alpha` is the ratio of the standard deviations (variability ratio)
- :math:`\\beta` is the ratio of the means (bias)
- :math:`x` and :math:'y' are forecast and observed values, respectively
- :math:`\\mu_x` and :math:`\\mu_y` are the means of forecast and observed values, respectively
- :math:`\\sigma_x` and :math:`\\sigma_y` are the standard deviations of forecast and observed values, respectively
- :math:`s_\\rho`, :math:`s_\\alpha` and :math:`s_\\beta` are the scaling factors for the correlation coefficient :math:`\\rho`,
the variability term :math:`\\alpha` and the bias term :math:`\\beta`
Args:
fcst: Forecast or predicted variables.
obs: Observed variables.
reduce_dims: Optionally specify which dimensions to reduce when
calculating the KGE. All other dimensions will be preserved.
preserve_dims: Optionally specify which dimensions to preserve when
calculating the KGE. All other dimensions will be reduced. As a
special case, 'all' will allow all dimensions to be preserved. In
this case, the result will be in the same shape/dimensionality
as the forecast, and the errors will be the error at each
point (i.e. single-value comparison against observed), and the
forecast and observed dimensions must match precisely.
scaling_factors : A 3-element vector or list describing the weights for each term in the KGE.
Defined by: scaling_factors = [s_rho, s_alpha, s_beta] to apply to the correlation (rho),
variability (alpha), and bias (beta) terms respectively. Defaults to (1.0, 1.0, 1.0). (*See
equation 10 in Gupta et al. (2009) for definitions of s_rho, s_alpha and s_beta*)
return_components (default False): If True, the function also returns the individual terms contributing to the KGE score:
- `rho`: Pearson correlation coefficient between forecasts and observations.
- `alpha`: Variability ratio (forecast/observation standard deviation).
- `beta`: Bias ratio between forecast and observation mean.
Returns:
The Kling-Gupta Efficiency (KGE) score as an xarray DataArray.
If `return_components` is True, the function returns `xarray.Dataset` kge_s with the following variables:
- `kge`: The KGE score.
- `rho`: The Pearson correlation coefficient.
- `alpha`: The variability ratio.
- `beta`: The bias term.
Notes:
- Stats are calculated only from values for which both observations and
simulations are not null values.
- This function isn't set up to take weights.
- Currently this function is working only on xarray data array
- preserve_dims: 'all' returns NaN like pearson correlation as std dev is zero for a single point
References:
- Gupta, H. V., Kling, H., Yilmaz, K. K., & Martinez, G. F. (2009). Decomposition of the mean squared error and
NSE performance criteria: Implications for improving hydrological modeling. Journal of Hydrology, 377(1-2), 80-91.
https://doi.org/10.1016/j.jhydrol.2009.08.003.
- Knoben, W. J. M., Freer, J. E., & Woods, R. A. (2019). Technical note: Inherent benchmark or not?
Comparing Nash-Sutcliffe and Kling-Gupta efficiency scores. Hydrology and Earth System Sciences, 23(10), 4323-4331.
https://doi.org/10.5194/hess-23-4323-2019.
Examples:
>>> kge_s = kge(forecasts, obs,preserve_dims='lat') # if data is of dimension {lat,time}, kge value is computed across the time dimension
>>> kge_s = kge(forecasts, obs,reduce_dims="time") # if data is of dimension {lat,time}, reduce_dims="time" is same as preserve_dims='lat'
>>> kge_s = kge(forecasts, obs, return_components=True) # kge_s is dataset of all three components and kge value itself
>>> kge_s_weighted = kge(forecasts, obs, scaling_factors=(0.5, 1.0, 2.0))
"""

# Type checks as xrray.corr can only handle xr.DataArray
if not isinstance(fcst, xr.DataArray):
raise TypeError("kge: fcst must be an xarray.DataArray")
if not isinstance(obs, xr.DataArray):
raise TypeError("kge: obs must be an xarray.DataArray")
if scaling_factors is not None and not isinstance(scaling_factors, (list, np.ndarray)):
raise TypeError("kge: scaling_factors must be a list of floats or a numpy array")

if scaling_factors is None:
scaling_factors = [1.0, 1.0, 1.0]
s_rho, s_alpha, s_beta = scaling_factors
reduce_dims = scores.utils.gather_dimensions(
fcst.dims, obs.dims, reduce_dims=reduce_dims, preserve_dims=preserve_dims
)
# Need to broadcast and match NaNs so that the fcst and obs are for the
# same points
fcst, obs = broadcast_and_match_nan(fcst, obs)
# compute linear correlation coefficient r between fcst and obs
rho = xr.corr(fcst, obs, reduce_dims) # type: ignore

# compute alpha (sigma_sim / sigma_obs)
sigma_fcst = fcst.std(reduce_dims)
sigma_obs = obs.std(reduce_dims)
alpha = sigma_fcst / sigma_obs

# compute beta (mu_sim / mu_obs)
mu_fcst = fcst.mean(reduce_dims)
mu_obs = obs.mean(reduce_dims)
beta = mu_fcst / mu_obs

# compute Euclidian distance from the ideal point in the scaled space
ed_s = np.sqrt((s_rho * (rho - 1)) ** 2 + (s_alpha * (alpha - 1)) ** 2 + (s_beta * (beta - 1)) ** 2)
kge_s = 1 - ed_s
if return_components:
# Create dataset of all components
kge_s = xr.Dataset(
{
"kge": kge_s,
"rho": rho,
"alpha": alpha,
"beta": beta,
}
)
return kge_s # type: ignore
119 changes: 119 additions & 0 deletions tests/continuous/test_standard.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,80 @@ def test_rmse_angular():

EXP_DS_PBIAS1 = xr.Dataset({"a": EXP_PBIAS1, "b": EXP_PBIAS2})

## for KGE
DA1_KGE = xr.DataArray(
np.array([[1, 2, 3], [0, 1, 0], [0.5, -0.5, 0.5], [3, 6, 3]]),
dims=("space", "time"),
coords=[
("space", ["w", "x", "y", "z"]),
("time", [1, 2, 3]),
],
)

DA2_KGE = xr.DataArray(
np.array([[2, 4, 6], [6, 5, 6], [3, 4, 5], [3, np.nan, 3]]),
dims=("space", "time"),
coords=[
("space", ["w", "x", "y", "z"]),
("time", [1, 2, 3]),
],
)

DA3_KGE = xr.DataArray(
np.array([[1, 2, 3], [3, 2.5, 3], [1.5, 2, 2.5], [1.5, np.nan, 1.5]]),
dims=("space", "time"),
coords=[
("space", ["w", "x", "y", "z"]),
("time", [1, 2, 3]),
],
)
DA4_KGE = xr.DataArray(
np.array([[1, 3, 7], [2, 2, 8], [3, 1, 7]]),
dims=("space", "time"),
coords=[
("space", ["x", "y", "z"]),
("time", [1, 2, 3]),
],
)
DA5_KGE = xr.DataArray(
np.array([1, 2, 3]),
dims=("space"),
coords=[("space", ["x", "y", "z"])],
)

## Expected KGE values
EXP_KGE_KEEP_SPACE_DIM = xr.DataArray(
np.array([0.2928932188134524, -1.2103875562418747, -0.44811448882050064, np.nan]),
dims=("space"),
coords=[("space", ["w", "x", "y", "z"])],
)
EXP_KGE_REDUCE_ALL = xr.DataArray(0.2928932188134524)

EXP_KGE_rho_returns_components = xr.DataArray(1.0)
EXP_KGE_alpha_returns_components = xr.DataArray(0.5)
EXP_KGE_beta_returns_components = xr.DataArray(0.5)

EXP_KGE_returns_components = xr.Dataset(
{
"kge": EXP_KGE_REDUCE_ALL,
"rho": EXP_KGE_rho_returns_components,
"alpha": EXP_KGE_alpha_returns_components,
"beta": EXP_KGE_beta_returns_components,
}
)


EXP_KGE_Scaling_Factors = xr.DataArray(
1 - np.sqrt((0.5 * (1 - 1)) ** 2 + (1.0 * (0.5 - 1)) ** 2 + (2 * (0.5 - 1)) ** 2)
)


EXP_KGE_DIFF_SIZE = xr.DataArray(
np.array([1.0, -1.0, -1.8791915368841288]),
dims=("time"),
coords=[("time", [1, 2, 3])],
)


@pytest.mark.parametrize(
("fcst", "obs", "reduce_dims", "preserve_dims", "weights", "expected"),
Expand Down Expand Up @@ -813,3 +887,48 @@ def test_pbias_dask():
result = result.compute()
assert isinstance(result.data, np.ndarray)
xr.testing.assert_equal(result, EXP_PBIAS3)


@pytest.mark.parametrize(
("fcst", "obs", "reduce_dims", "preserve_dims", "return_components", "scaling_factors", "expected"),
[
# Check reduce dim arg
(DA1_KGE, DA2_KGE, None, "space", False, None, EXP_KGE_KEEP_SPACE_DIM),
# Check preserve dim arg
(DA1_KGE, DA2_KGE, "time", None, False, None, EXP_KGE_KEEP_SPACE_DIM),
# Check reduce all
(DA3_KGE, DA2_KGE, None, None, False, None, EXP_KGE_REDUCE_ALL),
# returning components
(DA3_KGE, DA2_KGE, None, None, True, None, EXP_KGE_returns_components),
# Check scaling_factors
(DA3_KGE, DA2_KGE, None, None, False, [0.5, 1.0, 2.0], EXP_KGE_Scaling_Factors),
# Check different size arrays as input
(DA4_KGE, DA5_KGE, "space", None, False, None, EXP_KGE_DIFF_SIZE),
],
)
def test_kge(fcst, obs, reduce_dims, preserve_dims, return_components, scaling_factors, expected):
"""
Tests continuous.kge
"""
result = scores.continuous.kge(
fcst,
obs,
reduce_dims=reduce_dims,
preserve_dims=preserve_dims,
return_components=return_components,
scaling_factors=scaling_factors,
)
xr.testing.assert_allclose(result, expected, rtol=1e-10, atol=1e-10)


def test_kge_dask():
"""
Tests that continuous.kge works with Dask
"""
fcst = DA3_KGE.chunk()
obs = DA2_KGE.chunk()
result = scores.continuous.kge(fcst, obs)
assert isinstance(result.data, dask.array.Array) # type: ignore
result = result.compute() # type: ignore
assert isinstance(result.data, (np.ndarray, np.generic))
xr.testing.assert_equal(result, EXP_KGE_REDUCE_ALL)
Loading

0 comments on commit e0d8585

Please sign in to comment.