123 crps for ensembles (#126)

* FEAT: crps_for_ensemble * Extend tmp_coord_name to allow for multiple names to be generated --------- Co-authored-by: Tennessee Leeuwenburg <[email protected]>
nci · Feb 14, 2024 · dfe8f5b · dfe8f5b
1 parent 1239659
commit dfe8f5b
Show file tree

Hide file tree

Showing 9 changed files with 504 additions and 7 deletions.
diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@ Currently Included Metrics:
 
 | continuous                      | probability | categorical      | statistical tests |
 | ----------                      | ----------- | -----------      | ----------------- |
-| MAE, MSE, RMSE, Flip Flop Index, Quantile Score | CRPS, ROC   | FIRM, POD, POFD  |  Diebold Mariano (with the Harvey et al. 1997 and the Hering and Genton 2011 modifications) |
+| MAE, MSE, RMSE, Flip Flop Index, Quantile Score | CRPS for CDF, CRPS for ensemble, ROC   | FIRM, POD, POFD  |  Diebold Mariano (with the Harvey et al. 1997 and the Hering and Genton 2011 modifications) |
 
 
 **Notice -- This repository is currently undergoing initial construction and maintenance. It is not yet recommended for use. This notice will be removed after the first feature release. In the meantime, please feel free to look around, and don't hesitate to get in touch with any questions (see the contributing guide for how).**

diff --git a/docs/api.md b/docs/api.md
@@ -26,6 +26,7 @@
 .. autofunction:: scores.probability.crps_cdf
 .. autofunction:: scores.probability.adjust_fcst_for_crps
 .. autofunction:: scores.probability.crps_cdf_brier_decomposition
+.. autofunction:: scores.probability.crps_for_ensemble
 .. autofunction:: scores.probability.murphy_score
 .. autofunction:: scores.probability.murphy_thetas
 .. autofunction:: scores.probability.roc_curve_data

diff --git a/docs/summary_table_of_scores.md b/docs/summary_table_of_scores.md
@@ -1,3 +1,3 @@
 | continuous                   | probability                        | categorical                      | statistical tests      |
 | ----------                   | -----------                        | -----------                      | -----------            |
-| MAE, MSE, RMSE, Murphy score, Quantile Score | CRPS_CDF, Murphy score, ROC        | FIRM, POD, POFD, Flip Flop Index | Diebold Mariano (with the Harvey et al. 1997 and the Hering and Genton 2011 modifications)|
+| MAE, MSE, RMSE, Murphy score, Quantile Score | CRPS for CDF, CRPS for ensemble, Murphy score, ROC        | FIRM, POD, POFD, Flip Flop Index | Diebold Mariano (with the Harvey et al. 1997 and the Hering and Genton 2011 modifications)|
diff --git a/src/scores/probability/__init__.py b/src/scores/probability/__init__.py
@@ -7,5 +7,6 @@
     adjust_fcst_for_crps,
     crps_cdf,
     crps_cdf_brier_decomposition,
+    crps_for_ensemble,
 )
 from scores.probability.roc_impl import roc_curve_data
diff --git a/src/scores/probability/crps_impl.py b/src/scores/probability/crps_impl.py
@@ -1,9 +1,10 @@
 """
 This module supports the implementation of the CRPS scoring function, drawing from additional functions.
-The primary method, `crps_cdf` is imported into the probability module to be part of the probability API
+The two primary methods, `crps_cdf` and `crps_for_ensemble` are imported into 
+the probability module to be part of the probability API.
 """
 from collections.abc import Iterable
-from typing import Literal, Optional
+from typing import Literal, Optional, Sequence
 
 import numpy as np
 import pandas as pd
@@ -156,7 +157,7 @@ def crps_cdf(
     weights=None,
     include_components=False,
 ):
-    """Calculates CRPS CDF probabilistic metric.
+    """Calculates the CRPS probabilistic metric given CDF input.
 
     Calculates the continuous ranked probability score (CRPS), or the mean CRPS over
     specified dimensions, given forecasts in the form of predictive cumulative
@@ -280,7 +281,8 @@ def crps_cdf(
         ValueError: if `threshold_weight` has negative values.
 
     See also:
-        `scores.probability.crps_cdf_brier_decomposition`
+        - `scores.probability.crps_cdf_brier_decomposition`
+        - `scores.probability.crps_for_ensemble`
 
     References:
         - Matheson, J. E., and R. L. Winkler, 1976: Scoring rules for continuous probability distributions.
@@ -735,3 +737,91 @@ def crps_step_threshold_weight(
         weight = 1 - weight
 
     return weight
+
+
+def crps_for_ensemble(
+    fcst: xr.DataArray,
+    obs: xr.DataArray,
+    ensemble_member_dim: str,
+    method: Literal["ecdf", "fair"] = "ecdf",
+    reduce_dims: Optional[Sequence[str]] = None,
+    preserve_dims: Optional[Sequence[str]] = None,
+    weights: xr.DataArray = None,
+) -> xr.DataArray:
+    """Calculates the CRPS probabilistic metric given ensemble input.
+
+    Calculates the continuous ranked probability score (CRPS) given an ensemble of forecasts.
+    An ensemble of forecasts can also be thought of as a random sample from the predictive
+    distribution.
+
+    Given an observation y, and ensemble member values {x_i} (for 1 <= i <= M), the CRPS is
+    calculated by the formula
+        CRPS({x_i}, y) = (1 / M) * sum(|x_i - y|) - (1 / 2 * K) * sum(|x_i - x_j|),
+    where the first sum is iterated over 1 <= i <= M and the second sum is iterated over
+    1 <= i <= M and 1 <= j <= M.
+
+    The value of the constant K in this formula depends on the method.
+        - If `method="ecdf"` then K = M ** 2. In this case the CRPS value returned is
+            the exact CRPS value for the emprical cumulation distribution function
+            constructed using the ensemble values.
+        - If `method="fair"` then K = M * (M - 1). In this case the CRPS value returned
+            is the approximated CRPS where the ensemble values can be interpreted as a
+            random sample from the underlying predictive distribution. This interpretation
+            stems from the formula CRPS(F, Y) = E|X - Y| - E|X - X'|/2, where X and X'
+            are independent samples of the predictive distribution F, Y is the observation
+            (possibly unknown) and E denotes the expectation. This choice of K gives an
+            unbiased estimate for the second expectation.
+
+    Args:
+        fcst: Forecast data. Must have a dimension `ensemble_member_dim`.
+        obs: Observation data.
+        ensemble_member_dim: the dimension that specifies the ensemble member or the sample
+            from the predictive distribution.
+        method: Either "ecdf" or "fair".
+        reduce_dims: Dimensions to reduce. Can be "all" to reduce all dimensions.
+        preserve_dims: Dimensions to preserve. Can be "all" to preserve all dimensions.
+        weights: Weights for calculating a weighted mean of individual scores.
+
+    Returns:
+        xarray object of (weighted mean) CRPS values.
+
+    Raises:
+        ValueError: when method is not one of "ecdf" or "fair".
+
+    See also:
+        `scores.probability.crps_cdf`
+
+    References:
+        - C. Ferro (2014), "Fair scores for ensemble forecasts", Q J R Meteorol Soc
+            140(683):1917-1923.
+        - T. Gneiting T and A. Raftery (2007), "Strictly proper scoring rules, prediction,
+            and estimation", J Am Stat Assoc, 102(477):359-37.
+        - M. Zamo and P. Naveau (2018), "Estimation of the Continuous Ranked Probability
+            Score with Limited Information and Applications to Ensemble Weather Forecasts",
+            Math Geosci 50:209-234, https://doi.org/10.1007/s11004-017-9709-7
+    """
+    if method not in ["ecdf", "fair"]:
+        raise ValueError("`method` must be one of 'ecdf' or 'fair'")
+
+    dims_for_mean = scores.utils.gather_dimensions2(fcst, obs, weights, reduce_dims, preserve_dims, ensemble_member_dim)
+
+    ensemble_member_dim1 = scores.utils.tmp_coord_name(fcst)
+
+    # calculate forecast spread contribution
+    fcst_copy = fcst.rename({ensemble_member_dim: ensemble_member_dim1})
+
+    fcst_spread_term = np.abs(fcst - fcst_copy).sum([ensemble_member_dim, ensemble_member_dim1])
+    ens_count = fcst.count(ensemble_member_dim)
+    if method == "ecdf":
+        fcst_spread_term = fcst_spread_term / (2 * ens_count**2)
+    if method == "fair":
+        fcst_spread_term = fcst_spread_term / (2 * ens_count * (ens_count - 1))
+
+    # calculate final CRPS for each forecast case
+    fcst_obs_term = np.abs(fcst - obs).mean(ensemble_member_dim)
+    result = fcst_obs_term - fcst_spread_term
+
+    # apply weights and take means across specified dims
+    result = scores.functions.apply_weights(result, weights).mean(dim=dims_for_mean)
+
+    return result
diff --git a/src/scores/utils.py b/src/scores/utils.py
@@ -23,11 +23,23 @@
 It is ambiguous how to proceed therefore an exception has been raised instead.
 """
 
+ERROR_SPECIFIED_NONPRESENT_PRESERVE_DIMENSION2 = """
+You are requesting to preserve a dimension which does not appear in your data 
+(fcst, obs or weights). It is ambiguous how to proceed therefore an exception has been
+raised instead.
+"""
+
 ERROR_SPECIFIED_NONPRESENT_REDUCE_DIMENSION = """
 You are requesting to reduce a dimension which does not appear in your data (fcst or obs).
 It is ambiguous how to proceed therefore an exception has been raised instead.
 """
 
+ERROR_SPECIFIED_NONPRESENT_REDUCE_DIMENSION2 = """
+You are requesting to reduce a dimension which does not appear in your data
+(fcst, obs or weights). It is ambiguous how to proceed therefore an exception has been 
+raised instead.
+"""
+
 ERROR_OVERSPECIFIED_PRESERVE_REDUCE = """
 You have specified both preserve_dims and reduce_dims. This method doesn't know how
 to properly interpret that, therefore an exception has been raised.
@@ -48,7 +60,13 @@ def gather_dimensions(  # pylint: disable=too-many-branches
     preserve_dims: FlexibleDimensionTypes = None,
 ) -> set[Hashable]:
     """
-    Establish which dimensions to reduce when calculating errors but before taking means
+    Establish which dimensions to reduce when calculating errors but before taking means.
+
+    Note: `scores.utils.gather_dimensions` and `scores.utils.gather_dimensions2` will be
+    integrated at some point in the future. `scores.utils.gather_dimensions2` offers
+    more comprehensive and less restrictive dimension checking and should be preferred in
+    the meantime. See `scores.probability.crps_cdf` for an example of
+    `scores.utils.gather_dimensions2` usage.
 
     Args:
         fcst_dims: Forecast dimensions inputs
@@ -60,6 +78,9 @@ def gather_dimensions(  # pylint: disable=too-many-branches
         Dimensions based on optional args.
     Raises:
         ValueError: When `preserve_dims and `reduce_dims` are both specified.
+
+    See also:
+        `scores.utils.gather_dimensions2`
     """
 
     all_dims = set(fcst_dims).union(set(obs_dims))
@@ -112,6 +133,104 @@ def gather_dimensions(  # pylint: disable=too-many-branches
     return reduce_dims
 
 
+def gather_dimensions2(
+    fcst: xr.DataArray,
+    obs: xr.DataArray,
+    weights: xr.DataArray = None,
+    reduce_dims: FlexibleDimensionTypes = None,
+    preserve_dims: FlexibleDimensionTypes = None,
+    special_fcst_dims: FlexibleDimensionTypes = None,
+) -> set[Hashable]:
+    """
+    Performs standard dimensions checks for inputs of functions that calculate (mean) scores.
+    Returns a set of the dimensions to reduce.
+
+    Note: `scores.utils.gather_dimensions` and `scores.utils.gather_dimensions2` will be
+    integrated at some point in the future. `scores.utils.gather_dimensions2` offers
+    more comprehensive and less restrictive dimension checking and should be preferred in
+    the meantime. See `scores.probability.crps_cdf` for an example of
+    `scores.utils.gather_dimensions2` usage.
+
+    Args:
+        fcst: Forecast data
+        obs: Observation data
+        weights: Weights for calculating a weighted mean of scores
+        reduce_dims: Dimensions to reduce. Can be "all" to reduce all dimensions.
+        preserve_dims: Dimensions to preserve. Can be "all" to preserve all dimensions.
+        special_fcst_dims: Dimension(s) in `fcst` that are reduced to calculate individual scores.
+            Must not appear as a dimension in `obs`, `weights`, `reduce_dims` or `preserve_dims`.
+            e.g. the ensemble member dimension if calculating CRPS for ensembles, or the
+            threshold dimension of calculating CRPS for CDFs.
+
+    Returns:
+        Set of dimensions over which to take the mean once the checks are passed.
+
+    Raises:
+        ValueError: when `preserve_dims and `reduce_dims` are both specified.
+        ValueError: when `special_fcst_dims` is not a subset of `fcst.dims`.
+        ValueError: when `obs.dims`, `weights.dims`, `reduce_dims` or `preserve_dims`
+            contains elements from `special_fcst_dims`.
+        ValueError: when `preserve_dims and `reduce_dims` contain elements not among dimensions
+            of the data (`fcst`, `obs` or `weights`).
+
+    See also:
+        `scores.utils.gather_dimensions`
+    """
+    all_data_dims = set(fcst.dims).union(set(obs.dims))
+    if weights is not None:
+        all_data_dims = all_data_dims.union(set(weights.dims))
+
+    # all_scoring_dims is the set of dims remaining after individual scores are computed.
+    all_scoring_dims = all_data_dims.copy()
+
+    # Handle error conditions related to specified dimensions
+    if preserve_dims is not None and reduce_dims is not None:
+        raise ValueError(ERROR_OVERSPECIFIED_PRESERVE_REDUCE)
+
+    specified_dims = preserve_dims or reduce_dims
+
+    if specified_dims == "all":
+        if "all" in all_data_dims:
+            warnings.warn(WARN_ALL_DATA_CONFLICT_MSG)
+    elif specified_dims is not None:
+        if isinstance(specified_dims, str):
+            specified_dims = [specified_dims]
+
+    # check that special_fcst_dims are in fcst.dims only
+    if special_fcst_dims is not None:
+        if isinstance(special_fcst_dims, str):
+            special_fcst_dims = [special_fcst_dims]
+        if not set(special_fcst_dims).issubset(set(fcst.dims)):
+            raise ValueError("`special_fcst_dims` must be a subset of `fcst` dimensions")
+        if len(set(obs.dims).intersection(set(special_fcst_dims))) > 0:
+            raise ValueError("`obs.dims` must not contain any `special_fcst_dims`")
+        if weights is not None:
+            if len(set(weights.dims).intersection(set(special_fcst_dims))) > 0:
+                raise ValueError("`weights.dims` must not contain any `special_fcst_dims`")
+        if specified_dims is not None and specified_dims != "all":
+            if len(set(specified_dims).intersection(set(special_fcst_dims))) > 0:
+                raise ValueError("`reduce_dims` and `preserve_dims` must not contain any `special_fcst_dims`")
+        # remove special_fcst_dims from all_scoring_dims
+        all_scoring_dims = all_scoring_dims.difference(set(special_fcst_dims))
+
+    if specified_dims is not None and specified_dims != "all":
+        if not set(specified_dims).issubset(all_scoring_dims):
+            if preserve_dims is not None:
+                raise ValueError(ERROR_SPECIFIED_NONPRESENT_PRESERVE_DIMENSION2)
+            raise ValueError(ERROR_SPECIFIED_NONPRESENT_REDUCE_DIMENSION2)
+
+    # all errors have been captured, so now return list of dims to reduce
+    if specified_dims is None:
+        return all_scoring_dims
+    if reduce_dims is not None:
+        if reduce_dims == "all":
+            return all_scoring_dims
+        return set(specified_dims)
+    if preserve_dims == "all":
+        return set([])
+    return all_scoring_dims.difference(set(specified_dims))
+
+
 def dims_complement(data, dims=None) -> list[str]:
     """Returns the complement of data.dims and dims
 
@@ -215,3 +334,27 @@ def check_dims(xr_data: XarrayLike, expected_dims: Sequence[str], mode: Optional
                     f"Dimensions {list(xr_data[data_var].dims)} of data variable "
                     f"'{data_var}' are not {mode} to the dimensions {sorted(dims_set)}"
                 )
+
+
+def tmp_coord_name(xr_data: xr.DataArray, count=1) -> str:
+    """
+    Generates temporary coordinate names that are not among the coordinate or dimension
+    names of `xr_data`.
+
+    Args:
+        xr_data: Input xarray data array
+        count: Number of unique names to generate
+
+    Returns:
+        If count = 1, a string which is the concatenation of 'new' with all coordinate and
+        dimension names in the input array. (this is the default)
+        If count > 1, a list of such strings, each unique from one another
+    """
+    all_names = ["new"] + list(xr_data.dims) + list(xr_data.coords)
+    result = "".join(all_names)
+
+    if count == 1:
+        return result
+
+    results = [str(i) + result for i in range(count)]
+    return results
diff --git a/tests/probabilty/crps_test_data.py b/tests/probabilty/crps_test_data.py
@@ -562,3 +562,33 @@
 )
 
 EXP_CRPS_BD2 = xr.merge([EXP_TOTAL_CRPS_BD2, EXP_UNDER_CRPS_BD2, EXP_OVER_CRPS_BD2])
+
+# test data for CRPS for ensembles
+
+DA_FCST_CRPSENS = xr.DataArray(
+    data=[[0.0, 4, 3, 7], [1, -1, 2, 4], [0, 1, 4, np.nan], [2, 3, 4, 1], [2, np.nan, np.nan, np.nan]],
+    dims=["stn", "ens_member"],
+    coords={"stn": [101, 102, 103, 104, 105], "ens_member": [1, 2, 3, 4]},
+)
+DA_OBS_CRPSENS = xr.DataArray(data=[2.0, 3, 1, np.nan, 4], dims=["stn"], coords={"stn": [101, 102, 103, 104, 105]})
+DA_WT_CRPSENS = xr.DataArray(data=[1, 2, 1, 0, 2], dims=["stn"], coords={"stn": [101, 102, 103, 104, 105]})
+
+# first and second (spread) terms from crps for ensembles, "ecdf" and "fair" methods
+FIRST_TERM = xr.DataArray(
+    data=[10 / 4, 8 / 4, 4 / 3, np.nan, 2], dims=["stn"], coords={"stn": [101, 102, 103, 104, 105]}
+)
+SPREAD_ECDF = xr.DataArray(
+    data=[(14 + 8 + 8 + 14) / 32, (6 + 10 + 6 + 10) / 32, (5 + 4 + 7) / 18, np.nan, 0],
+    dims=["stn"],
+    coords={"stn": [101, 102, 103, 104, 105]},
+)
+SPREAD_FAIR = xr.DataArray(
+    data=[(14 + 8 + 8 + 14) / 24, (6 + 10 + 6 + 10) / 24, (5 + 4 + 7) / 12, np.nan, np.nan],
+    dims=["stn"],
+    coords={"stn": [101, 102, 103, 104, 105]},
+)
+
+# expected results
+EXP_CRPSENS_ECDF = FIRST_TERM - SPREAD_ECDF
+EXP_CRPSENS_FAIR = FIRST_TERM - SPREAD_FAIR
+EXP_CRPSENS_WT = (EXP_CRPSENS_ECDF * DA_WT_CRPSENS).mean("stn")