nci · tennlee · Sep 10, 2024 · Aug 23, 2024 · Aug 23, 2024 · Aug 23, 2024
diff --git a/README.md b/README.md
@@ -18,7 +18,7 @@ Here is a **curated selection** of the metrics, tools and statistical tests incl
 |                       	| **Description** 	| **Selection of Included Functions** 	|
 |-----------------------	|-----------------	|--------------	|
 | **[Continuous](https://scores.readthedocs.io/en/stable/included.html#continuous)**        	|Scores for evaluating single-valued continuous forecasts.                  	|Mean Absolute Error (MAE), Mean Squared Error (MSE), Root Mean Squared Error (RMSE), Additive Bias, Multiplicative Bias, Pearson's Correlation Coefficient, Flip-Flop Index, Quantile Loss, Murphy Score, families of consistent scoring functions for quantiles and expectiles, Threshold Weighted Squared Error, Threshold Weighted Quantile Score, Threshold Weighted Absolute Error, Threshold Weighted Expectile Score, Threshold Weighted Huber Loss.             	|
-| **[Probability](https://scores.readthedocs.io/en/stable/included.html#probability)**       	|Scores for evaluating forecasts that are expressed as predictive distributions, ensembles, and probabilities of binary events.                 	|Brier Score, Continuous Ranked Probability Score (CRPS) for Cumulative Density Function (CDF), Threshold weighted CRPS for CDF, CRPS for ensembles, Receiver Operating Characteristic (ROC), Isotonic Regression (reliability diagrams).              	|
+| **[Probability](https://scores.readthedocs.io/en/stable/included.html#probability)**       	|Scores for evaluating forecasts that are expressed as predictive distributions, ensembles, and probabilities of binary events.                 	|Brier Score, Continuous Ranked Probability Score (CRPS) for Cumulative Density Function (CDF), Threshold weighted CRPS for CDF, CRPS for ensembles, Threshold weighted CRPS for ensembles, Receiver Operating Characteristic (ROC), Isotonic Regression (reliability diagrams).              	|
 | **[Categorical](https://scores.readthedocs.io/en/stable/included.html#categorical)**       	|Scores (including contingency table metrics) for evaluating forecasts of categories.                	|Probability of Detection (POD), False Alarm Ratio (FAR), Probability of False Detection (POFD), Success Ratio, Accuracy, Peirce's Skill Score, Critical Success Index (CSI), Gilbert Skill Score, Heidke Skill Score, Odds Ratio, Odds Ratio Skill Score, F1 score, Symmetric Extremal Dependence Index, FIxed Risk Multicategorical (FIRM) Score.               	|
 | **[Spatial](https://scores.readthedocs.io/en/stable/included.html#spatial)** 	|Scores that take into account spatial structure.                 	|Fractions Skill Score.              	|
 | **[Statistical Tests](https://scores.readthedocs.io/en/stable/included.html#statistical-tests)** 	|Tools to conduct statistical tests and generate confidence intervals.                 	|Diebold Mariano.              	|

diff --git a/docs/api.md b/docs/api.md
@@ -40,6 +40,8 @@
 .. autofunction:: scores.probability.crps_step_threshold_weight
 .. autofunction:: scores.probability.crps_cdf_brier_decomposition
 .. autofunction:: scores.probability.crps_for_ensemble
+.. autofunction:: scores.probability.twcrps_for_ensemble
+.. autofunction:: scores.probability.tail_twcrps_for_ensemble
 .. autofunction:: scores.probability.murphy_score
 .. autofunction:: scores.probability.murphy_thetas
 .. autofunction:: scores.probability.roc_curve_data

diff --git a/docs/included.md b/docs/included.md
@@ -219,7 +219,15 @@
 * - Reliability Diagram, *see Isotonic Regression*
   - &mdash;
   - &mdash;
-  - &mdash;  
+  - &mdash;
+* - Tail Threshold Weighted Continuous Ranked Probability Score (twCRPS) for Ensembles
+  - [API](api.md#scores.probability.tail_twcrps_for_ensemble)   
+  - &mdash;
+  - [Allen et al. (2023)](https://doi.org/10.1137/22M1532184) 
+* - Threshold Weighted Continuous Ranked Probability Score (twCRPS) for Ensembles
+  - [API](api.md#scores.probability.twcrps_for_ensemble)   
+  - &mdash;
+  - [Allen et al. (2023)](https://doi.org/10.1137/22M1532184) 
 ```
 
 ## Categorical

diff --git a/src/scores/probability/__init__.py b/src/scores/probability/__init__.py
@@ -10,6 +10,8 @@
     crps_cdf_brier_decomposition,
     crps_for_ensemble,
     crps_step_threshold_weight,
+    tail_twcrps_for_ensemble,
+    twcrps_for_ensemble,
 )
 from scores.probability.roc_impl import roc_curve_data
 from scores.processing.isoreg_impl import isotonic_fit
@@ -25,4 +27,6 @@
     "roc_curve_data",
     "isotonic_fit",
     "crps_step_threshold_weight",
+    "twcrps_for_ensemble",
+    "tail_twcrps_for_ensemble",
 ]
diff --git a/src/scores/probability/crps_impl.py b/src/scores/probability/crps_impl.py
@@ -5,7 +5,7 @@
 """
 
 from collections.abc import Iterable
-from typing import Literal, Optional, Sequence
+from typing import Callable, Literal, Optional, Sequence, Union
 
 import numpy as np
 import pandas as pd
@@ -21,6 +21,7 @@
     observed_cdf,
     propagate_nan,
 )
+from scores.typing import XarrayLike
 
 
 # pylint: disable=too-many-arguments
@@ -770,15 +771,15 @@ def crps_step_threshold_weight(
 
 
 def crps_for_ensemble(
-    fcst: xr.DataArray,
-    obs: xr.DataArray,
+    fcst: XarrayLike,
+    obs: XarrayLike,
     ensemble_member_dim: str,
     *,  # Force keywords arguments to be keyword-only
     method: Literal["ecdf", "fair"] = "ecdf",
     reduce_dims: Optional[Sequence[str]] = None,
     preserve_dims: Optional[Sequence[str]] = None,
-    weights: Optional[xr.DataArray] = None,
-) -> xr.DataArray:
+    weights: Optional[XarrayLike] = None,
+) -> XarrayLike:
     """Calculates the CRPS probabilistic metric given ensemble input.
 
     Calculates the continuous ranked probability score (CRPS) given an ensemble of forecasts.
@@ -825,6 +826,8 @@ def crps_for_ensemble(
 
     See also:
         :py:func:`scores.probability.crps_cdf`
+        :py:func:`twcrps_for_ensemble`
+        :py:func:`tail_twcrps_for_ensemble`
 
     References:
         - C. Ferro (2014), "Fair scores for ensemble forecasts", Quarterly Journal of the \
@@ -872,3 +875,194 @@ def crps_for_ensemble(
     result = scores.functions.apply_weights(result, weights=weights).mean(dim=dims_for_mean)  # type: ignore
 
     return result  # type: ignore
+
+
+def twcrps_for_ensemble(
+    fcst: XarrayLike,
+    obs: XarrayLike,
+    ensemble_member_dim: str,
+    v_func: Callable[[XarrayLike], XarrayLike],
+    *,  # Force keywords arguments to be keyword-only
+    method: Literal["ecdf", "fair"] = "ecdf",
+    reduce_dims: Optional[Sequence[str]] = None,
+    preserve_dims: Optional[Sequence[str]] = None,
+    weights: Optional[XarrayLike] = None,
+) -> xr.DataArray:
+    """
+    Calculates the threshold weighted continuous ranked probability score (twCRPS) given
+    ensemble input using a chaing function ``vfunc``. An ensemble of forecasts can also
+    be thought of as a random sample from the predictive distribution.
+
+    The twCRPS is calculated by the formula
+
+
+    .. math::
+        \\text{CRPS}(F, y) = \\mathbb{E}_F \\left| v(X) - v(y) \\right| - \\frac{1}{2} \\mathbb{E}_F \\left| v(X) - v(X') \\right|,
+
+    where :math:`X` and :math:`X'` are independent samples of the predictive distribution :math:`F`,
+    :math:`y` is the observation, and :math:`v` is a 'chaining function'.
+
+    The chaining function :math:`v` is the antiderivative of the weight function :math:`w`, which is a
+    continous, non-negative function that assigns a weight to each threshold value. For example, if we
+    wanted to assign a weight of 1 to thresholds above threshold :math:`t` and a weight of 0 to
+    thresholds below :math:`t`, our weight function would be :math:`w(x) = \\mathbb{1}{(x > t)}`,
+    where :math:`\\mathbb{1}` is the indicator function which returns a value of 1 if the condition
+    is true and 0 otherwise. The chaining function would then be :math:`v(x) = \\text{max}(x, t)`.
+
+    The ensemble representation of the emperical twCRPS is
+
+
+    .. math::
+        \\text{twCRPS}(F_{\\text{ens}}, y; v) = \\frac{1}{M} \\sum_{m=1}^{M} \\left| v(x_m) - v(y) \\right| -
+        \\frac{1}{2M^2} \\sum_{m=1}^{M} \\sum_{j=1}^{M} \\left| v(x_m) - v(x_j) \\right|,
+
+    where :math:`M` is the number of ensemble members.
+
+    Args:
+        fcst: Forecast data. Must have a dimension `ensemble_member_dim`.
+        obs: Observation data.
+        ensemble_member_dim: the dimension that specifies the ensemble member or the sample
+            from the predictive distribution.
+        v_func: the chaining function.
+        method: Either "ecdf" or "fair".
+        reduce_dims: Dimensions to reduce. Can be "all" to reduce all dimensions.
+        preserve_dims: Dimensions to preserve. Can be "all" to preserve all dimensions.
+        weights: Weights for calculating a weighted mean of individual scores.
+
+    Returns:
+        xarray object of twCRPS values.
+
+    Raises:
+        ValueError: when method is not one of "ecdf" or "fair".
+
+    Notes:
+        Chaining functions can be created to vary the weights across given dimensions
+        such as varying the weights by climatological values.
+
+    References:
+        Allen, S., Ginsbourger, D., & Ziegel, J. (2023). Evaluating forecasts for high-impact
+        events using transformed kernel scores. SIAM/ASA Journal on Uncertainty
+        Quantification, 11(3), 906-940. https://doi.org/10.1137/22M1532184
+
+    See also:
+        :py:func:`scores.probability.crps_for_ensemble`
+        :py:func:`tail_twcrps_for_ensemble`
+        :py:func:`scores.probability.crps_cdf`
+
+
+    Examples:
+        Calculate the twCRPS for an ensemble of forecasts where the chaining function is the
+        return is derive from a weight function that assigns a weight of 1 to thresholds above
+        0.5 and a weight of 0 to thresholds below 0.5.
+
+        >>> import numpy as np
+        >>> import xarray as xr
+        >>> from scores.probability import twcrps_for_ensemble
+        >>> fcst = xr.DataArray(np.random.rand(10, 10), dims=['time', 'ensemble'])
+        >>> obs = xr.DataArray(np.random.rand(10), dims=['time'])
+        >>> twcrps_for_ensemble(fcst, obs, 'ensemble', lambda x: np.maximum(x, 0.5))
+
+    """
+
+    fcst = v_func(fcst)
+    obs = v_func(obs)
+    result = crps_for_ensemble(
+        fcst,
+        obs,
+        ensemble_member_dim,
+        method=method,
+        reduce_dims=reduce_dims,
+        preserve_dims=preserve_dims,
+        weights=weights,
+    )
+    return result
+
+
+def tail_twcrps_for_ensemble(
+    fcst: XarrayLike,
+    obs: XarrayLike,
+    ensemble_member_dim: str,
+    threshold: Union[XarrayLike, float],
+    *,  # Force keywords arguments to be keyword-only
+    tail: Literal["upper", "lower"] = "upper",
+    method: Literal["ecdf", "fair"] = "ecdf",
+    reduce_dims: Optional[Sequence[str]] = None,
+    preserve_dims: Optional[Sequence[str]] = None,
+    weights: Optional[XarrayLike] = None,
+) -> XarrayLike:
+    """
+    Calculates the threshold weighted continuous ranked probability score (twCRPS)
+    weighted for a tail of the distribution from ensemble input.
+
+    A weight of 1 is assigned for values of the tail and a weight of 0 otherwise.
+    The threshold value of where the tail begoms is specified by the ``threshold`` argument.
+    The ``tail`` argument specifies whether the tail is the upper or lower tail.
+    For more flexible weighting options and the relevant equations, see the
+    :py:func:`twcrps_for_ensemble` function.
+
+    Args:
+        fcst: Forecast data. Must have a dimension `ensemble_member_dim`.
+        obs: Observation data.
+        ensemble_member_dim: the dimension that specifies the ensemble member or the sample
+            from the predictive distribution.
+        threshold: the threshold value for where the tail begins. It can either be a float
+            for a single threshold or an xarray object if the threshold varies across
+            dimensions (e.g., climatological values).
+        tail: the tail of the distribution to weight. Either "upper" or "lower".
+        method: Either "ecdf" or "fair".
+        reduce_dims: Dimensions to reduce. Can be "all" to reduce all dimensions.
+        preserve_dims: Dimensions to preserve. Can be "all" to preserve all dimensions.
+        weights: Weights for calculating a weighted mean of individual scores.
+
+    Returns:
+        xarray object of twCRPS values that has been weighted based on the tail.
+
+    Raises:
+        ValueError: when `tail` is not one of "upper" or "lower".
+        ValueError: when method is not one of "ecdf" or "fair".
+
+    References:
+        Allen, S., Ginsbourger, D., & Ziegel, J. (2023). Evaluating forecasts for high-impact
+        events using transformed kernel scores. SIAM/ASA Journal on Uncertainty
+        Quantification, 11(3), 906-940. https://doi.org/10.1137/22M1532184
+
+    See also:
+        :py:func:`scores.probability.twcrps_for_ensemble`
+        :py:func:`scores.probability.crps_for_ensemble`
+        :py:func:`scores.probability.crps_cdf`
+
+    Examples:
+        Calculate the twCRPS for an ensemble of that assigns a weight of 1 to thresholds above
+        0.5 and a weight of 0 to thresholds below 0.5.
+
+        >>> import numpy as np
+        >>> import xarray as xr
+        >>> from scores.probability import tail_twcrps_for_ensemble
+        >>> fcst = xr.DataArray(np.random.rand(10, 10), dims=['time', 'ensemble'])
+        >>> obs = xr.DataArray(np.random.rand(10), dims=['time'])
+        >>> tail_twcrps_for_ensemble(fcst, obs, 'ensemble', 0.5, tail='upper')
+
+    """
+    if tail not in ["upper", "lower"]:
+        raise ValueError(f"'{tail}' is not one of 'upper' or 'lower'")
+    if tail == "upper":
+
+        def _vfunc(x):
+            return np.maximum(x, threshold)
+
+    else:
+
+        def _vfunc(x):
+            return np.minimum(x, threshold)
+
+    result = twcrps_for_ensemble(
+        fcst,
+        obs,
+        ensemble_member_dim,
+        _vfunc,
+        method=method,
+        reduce_dims=reduce_dims,
+        preserve_dims=preserve_dims,
+        weights=weights,
+    )
+    return result