Skip to content

Commit

Permalink
docstring for crsp_for_ensembles
Browse files Browse the repository at this point in the history
  • Loading branch information
rob-taggart committed Dec 12, 2023
1 parent 49b7b79 commit c560765
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 16 deletions.
53 changes: 42 additions & 11 deletions src/scores/probability/crps_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -749,21 +749,52 @@ def crps_for_ensemble(
weights: XarrayLike = None,
) -> XarrayLike:
"""
Calculates CRPS using the formula CRPS(F, Y) = E|X - Y| - E|X - X'|/2
where X, X' are independent samples of the predictive distribution F and Y is the observation (possibly unknown).
Samples from F and Y are drawn from the fcst_sample_dim and obs_sample_dim respectively.
Other dimensions are broadcast using xr broadcast rules.
Calculates the continuous ranked probability score (CRPS) given an ensemble of forecasts.
An ensemble of forecasts can also be thought of as a random sample from the predictive
distribution.
Given an observation y, and ensemble member values {x_i} (for 1 <= i <= M), the CRPS is
calculated by the formaula
CRPS({x_i}, y) = (1 / M) * sum(|x_i - y_i|) - (1 / 2 * K) * sum(|x_i - x_j|),
where the first sum is iterated over 1 <= i <= M and the second sum is iterated over
1 <= i <= M and 1 <= j <= M.
The value of the constant K in this formula depends on the method.
- If `method="ecdf"` then K = M ** 2. In this case the CRPS value returned is
the exact CRPS value for the emprical cumulation distribution function
constructed using the ensemble values.
- If `method="fair"` then K = M * (M - 1). In this case the CRPS value returned
is the approximated CRPS where the ensemble values can be interpreted as a
random sample from the underlying predictive distribution. This interpretation
stems from the formula CRPS(F, Y) = E|X - Y| - E|X - X'|/2, where X and X'
are independent samples of the predictive distribution F, Y is the observation
(possibly unknown) and E denotes the expectation. This choice of K gives an
unbiased estimate for the second expectation.
Args:
fcst: Forecast data.
obs: Observation data
weights: Weights for calculating a weighted mean of scores
fcst: Forecast data. Must have a dimension `ensemble_member_dim`.
obs: Observation data.
weights: Weights for calculating a weighted mean of individual scores.
ensemble_member_dim: the dimension that specifies the ensemble member or the sample
from the predictive distribution.
method: Either "ecdf" or "fair".
reduce_dims: Dimensions to reduce. Can be "all" to reduce all dimensions.
preserve_dims: Dimensions to preserve. Can be "all" to preserve all dimensions.
special_fcst_dims: Dimension(s) in `fcst` that are reduced to calculate individual scores.
Must not appear as a dimension in `obs`, `weights`, `reduce_dims` or `preserve_dims`.
e.g. the ensemble member dimension if calculating CRPS for ensembles, or the
threshold dimension of calculating CRPS for CDFs.
Returns:
xarray object of (weighted mean) CRPS values.
Raises:
ValueError: when method is not one of "ecdf" or "fair".
References:
- C. Ferro (2014), "Fair scores for ensemble forecasts", Q J R Meteorol Soc
140(683):1917-1923.
- T. Gneiting T and A. Raftery (2007), "Strictly proper scoring rules, prediction,
and estimation", J Am Stat Assoc, 102(477):359-37.
- M. Zamo and P. Naveau (2018), "Estimation of the Continuous Ranked Probability
Score with Limited Information and Applications to Ensemble Weather Forecasts",
Math Geosci 50:209-234, https://doi.org/10.1007/s11004-017-9709-7
"""
if method not in ["ecdf", "fair"]:
raise ValueError("`method` must be one of 'ecdf' or 'fair'")
Expand Down
18 changes: 15 additions & 3 deletions src/scores/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,23 @@
It is ambiguous how to proceed therefore an exception has been raised instead.
"""

ERROR_SPECIFIED_NONPRESENT_PRESERVE_DIMENSION2 = """
You are requesting to preserve a dimension which does not appear in your data
(fcst, obs or weights). It is ambiguous how to proceed therefore an exception has been
raised instead.
"""

ERROR_SPECIFIED_NONPRESENT_REDUCE_DIMENSION = """
You are requesting to reduce a dimension which does not appear in your data (fcst or obs).
It is ambiguous how to proceed therefore an exception has been raised instead.
"""

ERROR_SPECIFIED_NONPRESENT_REDUCE_DIMENSION2 = """
You are requesting to reduce a dimension which does not appear in your data
(fcst, obs or weights). It is ambiguous how to proceed therefore an exception has been
raised instead.
"""

ERROR_OVERSPECIFIED_PRESERVE_REDUCE = """
You have specified both preserve_dims and reduce_dims. This method doesn't know how
to properly interpret that, therefore an exception has been raised.
Expand Down Expand Up @@ -121,7 +133,7 @@ def gather_dimensions2(
special_fcst_dims: FlexibleDimensionTypes = None,
) -> set[Hashable]:
"""
Performs a standard dimensions check for inputs of a function that calculates (mean) scores.
Performs standard dimensions checks for inputs of functions that calculate (mean) scores.
Returns a set of the dimensions to reduce.
Args:
Expand Down Expand Up @@ -186,8 +198,8 @@ def gather_dimensions2(
if specified_dims is not None and specified_dims != "all":
if not set(specified_dims).issubset(all_scoring_dims):
if preserve_dims is not None:
raise ValueError(ERROR_SPECIFIED_NONPRESENT_PRESERVE_DIMENSION)
raise ValueError(ERROR_SPECIFIED_NONPRESENT_REDUCE_DIMENSION)
raise ValueError(ERROR_SPECIFIED_NONPRESENT_PRESERVE_DIMENSION2)
raise ValueError(ERROR_SPECIFIED_NONPRESENT_REDUCE_DIMENSION2)

# all errors have been captured, so now return list of dims to reduce
if specified_dims is None:
Expand Down
4 changes: 2 additions & 2 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ def test_gather_dimensions_exceptions():
None,
["blue", "yellow"],
None,
utils.ERROR_SPECIFIED_NONPRESENT_PRESERVE_DIMENSION,
utils.ERROR_SPECIFIED_NONPRESENT_PRESERVE_DIMENSION2,
),
(
utils_test_data.DA_RGB,
Expand All @@ -587,7 +587,7 @@ def test_gather_dimensions_exceptions():
"yellow",
None,
"blue",
utils.ERROR_SPECIFIED_NONPRESENT_REDUCE_DIMENSION,
utils.ERROR_SPECIFIED_NONPRESENT_REDUCE_DIMENSION2,
),
],
)
Expand Down

0 comments on commit c560765

Please sign in to comment.