nci · tennlee · Sep 5, 2024 · Aug 21, 2024 · Aug 21, 2024 · Aug 21, 2024
diff --git a/docs/api.md b/docs/api.md
@@ -22,6 +22,7 @@
 .. autofunction:: scores.continuous.flip_flop_index_proportion_exceeding
 .. autofunction:: scores.continuous.correlation.pearsonr
 .. autofunction:: scores.continuous.multiplicative_bias
+.. autofunction:: scores.continuous.pbias
 .. autofunction:: scores.continuous.isotonic_fit
 .. autofunction:: scores.continuous.consistent_expectile_score
 .. autofunction:: scores.continuous.consistent_quantile_score

diff --git a/docs/included.md b/docs/included.md
@@ -97,6 +97,10 @@
   - [API](api.md#scores.continuous.correlation.pearsonr)
   - [Tutorial](project:./tutorials/Pearsons_Correlation.md)
   - [Wikipedia](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient)
+* - Percent Bias
+  - [API](api.md#scores.continuous.pbias)
+  - [Tutorial](project:./tutorials/Additive_and_multiplicative_bias.md)
+  - [Percent Bias (CRAN hydroGOF)](https://search.r-project.org/CRAN/refmans/hydroGOF/html/pbias.html); [Sorooshian et al. (1993)](https://doi.org/10.1029/92WR02617); [Alfieri et al. (2014)](http://dx.doi.org/10.1016/j.jhydrol.2014.06.035); [Dawson et al. (2007)](https://doi.org/10.1016/j.envsoft.2006.06.008); [Moriasi et al. (2007)[https://doi.org/10.13031/2013.23153]
 * - Pinball Loss, *see Quantile Loss*
   - &mdash;
   - &mdash;

diff --git a/src/scores/continuous/__init__.py b/src/scores/continuous/__init__.py
@@ -20,6 +20,7 @@
     mean_error,
     mse,
     multiplicative_bias,
+    pbias,
     rmse,
 )
 from scores.continuous.threshold_weighted_impl import (
@@ -44,6 +45,7 @@
     "additive_bias",
     "mean_error",
     "multiplicative_bias",
+    "pbias",
     "isotonic_fit",
     "consistent_expectile_score",
     "consistent_huber_score",

diff --git a/src/scores/continuous/standard_impl.py b/src/scores/continuous/standard_impl.py
@@ -341,3 +341,81 @@ def multiplicative_bias(
     fcst, obs = broadcast_and_match_nan(fcst, obs)
     multi_bias = fcst.mean(dim=reduce_dims) / obs.mean(dim=reduce_dims)
     return multi_bias
+
+
+def pbias(
+    fcst: XarrayLike,
+    obs: XarrayLike,
+    *,
+    reduce_dims: Optional[FlexibleDimensionTypes] = None,
+    preserve_dims: Optional[FlexibleDimensionTypes] = None,
+    weights: Optional[XarrayLike] = None,
+) -> XarrayLike:
+    """
+    Calculates the percent bias, which is the ratio of the additive bias to the mean observed value, multiplied by 100.
+
+    Percent bias is used for evaluating and comparing forecast accuracy across stations or dataset with varying magnitudes.
+    By expressing the error as a percentage of the observed value, it allows for standardized comparisons, enabling assessment
+    of forecast performance regardless of the absolute scale of values. Like multiplicative_bias, Percent bias will return a np.inf
+    where the mean of `obs` across the dims to be reduced is 0.
+    It is defined as
+
+    .. math::
+        \\text{Percent bias} = 100 * \\frac{\\sum_{i=1}^{N}(x_i - y_i)}{\\sum_{i=1}^{N} y_i}
+
+    where:
+        - :math:`x_i` = the values of x in a sample (i.e. forecast values)
+        - :math:`y_i` = the values of y in a sample (i.e. observed values)
+
+    See "pbias" section at https://search.r-project.org/CRAN/refmans/hydroGOF/html/pbias.html for more information
+
+    Args:
+        fcst: Forecast or predicted variables.
+        obs: Observed variables.
+        reduce_dims: Optionally specify which dimensions to reduce when
+            calculating the percentage additive bias. All other dimensions will be preserved.
+        preserve_dims: Optionally specify which dimensions to preserve when
+            calculating the additive bias percentage. All other dimensions will be reduced. As a
+            special case, 'all' will allow all dimensions to be preserved. In
+            this case, the result will be in the same shape/dimensionality
+            as the forecast, and the errors will be the error at each
+            point (i.e. single-value comparison against observed), and the
+            forecast and observed dimensions must match precisely.
+        weights: Optionally provide an array for weighted averaging (e.g. by area, by latitude,
+            by population, custom)
+
+    Returns:
+        An xarray object with the percent bias of a forecast.
+
+    References:
+        -   Sorooshian, S., Duan, Q., & Gupta, V. K. (1993). Calibration of rainfall-runoff models:
+            Application of global optimization to the Sacramento Soil Moisture Accounting Model.
+            Water Resources Research, 29(4), 1185-1194. https://doi.org/10.1029/92WR02617
+        -   Alfieri, L., Pappenberger, F., Wetterhall, F., Haiden, T., Richardson, D., & Salamon, P. (2014).
+            Evaluation of ensemble streamflow predictions in Europe. Journal of Hydrology, 517, 913-922.
+            http://dx.doi.org/10.1016/j.jhydrol.2014.06.035
+        -   Dawson, C. W., Abrahart, R. J., & See, L. M. (2007). HydroTest:
+            A web-based toolbox of evaluation metrics for the standardised assessment of hydrological forecasts.
+            Environmental Modelling and Software, 22(7), 1034-1052.
+            https://doi.org/10.1016/j.envsoft.2006.06.008
+        -   Moriasi, D. N., Arnold, J. G., Van Liew, M. W., Bingner, R. L., Harmel, R. D., & Veith, T. L. (2007).
+            Model evaluation guidelines for systematic quantification of accuracy in watershed simulations.
+            Transactions of the ASABE, 50(3), 885-900. https://doi.org/10.13031/2013.23153
+
+
+
+
+    """
+    reduce_dims = scores.utils.gather_dimensions(
+        fcst.dims, obs.dims, reduce_dims=reduce_dims, preserve_dims=preserve_dims
+    )
+    fcst = scores.functions.apply_weights(fcst, weights=weights)
+    obs = scores.functions.apply_weights(obs, weights=weights)
+
+    # Need to broadcast and match NaNs so that the mean error and obs mean are for the
+    # same points
+    fcst, obs = broadcast_and_match_nan(fcst, obs)
+    error = fcst - obs
+
+    _pbias = 100 * error.mean(dim=reduce_dims) / obs.mean(dim=reduce_dims)
+    return _pbias
diff --git a/tests/continuous/test_standard.py b/tests/continuous/test_standard.py
@@ -661,6 +661,34 @@ def test_rmse_angular():
 EXP_DS_BIAS1 = xr.Dataset({"a": EXP_BIAS1, "b": -EXP_BIAS1})
 EXP_DS_BIAS2 = xr.Dataset({"a": EXP_BIAS4, "b": EXP_BIAS5})
 
+## for pbias
+EXP_PBIAS1 = xr.DataArray(
+    np.array([-50, -100.0, (0.5 / 3 + 0.5 / 3) / (-0.5 / 3) * 100]),
+    dims=("space"),
+    coords=[
+        ("space", ["w", "x", "y"]),
+    ],
+)
+EXP_PBIAS2 = xr.DataArray(
+    np.array([100.0, np.inf, (0.5 / 3 + 0.5 / 3) / (-0.5 / 3) * 100]),
+    dims=("space"),
+    coords=[
+        ("space", ["w", "x", "y"]),
+    ],
+)
+
+EXP_PBIAS3 = xr.DataArray(
+    np.array([-50.0, -100.0, -75.0]),
+    dims=("space"),
+    coords=[
+        ("space", ["w", "x", "y"]),
+    ],
+)
+
+EXP_PBIAS4 = xr.DataArray(np.array(-13 / 15.5 * 100))
+
+EXP_DS_PBIAS1 = xr.Dataset({"a": EXP_PBIAS1, "b": EXP_PBIAS2})
+
 
 @pytest.mark.parametrize(
     ("fcst", "obs", "reduce_dims", "preserve_dims", "weights", "expected"),
@@ -745,3 +773,43 @@ def test_multiplicative_bias_dask():
     result = result.compute()
     assert isinstance(result.data, np.ndarray)
     xr.testing.assert_equal(result, EXP_BIAS6)
+
+
+@pytest.mark.parametrize(
+    ("fcst", "obs", "reduce_dims", "preserve_dims", "weights", "expected"),
+    [
+        # Check reduce dim arg
+        (DA1_BIAS, DA2_BIAS, None, "space", None, EXP_PBIAS1),
+        # Check divide by zero returns a np.inf
+        (DA2_BIAS, DA1_BIAS, None, "space", None, EXP_PBIAS2),
+        # Check weighting works
+        (DA1_BIAS, DA3_BIAS, None, "space", BIAS_WEIGHTS, EXP_PBIAS3),
+        # # Check preserve dim arg
+        (DA1_BIAS, DA2_BIAS, "time", None, None, EXP_PBIAS1),
+        # Reduce all
+        (DA1_BIAS, DA2_BIAS, None, None, None, EXP_PBIAS4),
+        # Test with Dataset
+        (DS_BIAS1, DS_BIAS2, None, "space", None, EXP_DS_PBIAS1),
+    ],
+)
+def test_pbias(fcst, obs, reduce_dims, preserve_dims, weights, expected):
+    """
+    Tests continuous.pbias
+    """
+    result = scores.continuous.pbias(fcst, obs, reduce_dims=reduce_dims, preserve_dims=preserve_dims, weights=weights)
+    # xr.testing.assert_equal(result, expected)
+    xr.testing.assert_allclose(result, expected, rtol=1e-10, atol=1e-10)
+
+
+def test_pbias_dask():
+    """
+    Tests that continuous.pbias works with Dask
+    """
+    fcst = DA1_BIAS.chunk()
+    obs = DA3_BIAS.chunk()
+    weights = BIAS_WEIGHTS.chunk()
+    result = scores.continuous.pbias(fcst, obs, preserve_dims="space", weights=weights)
+    assert isinstance(result.data, dask.array.Array)
+    result = result.compute()
+    assert isinstance(result.data, np.ndarray)
+    xr.testing.assert_equal(result, EXP_PBIAS3)
diff --git a/tutorials/Additive_and_multiplicative_bias.ipynb b/tutorials/Additive_and_multiplicative_bias.ipynb