diff --git a/docs/paper.bib b/docs/paper.bib index 6f636923..538f0526 100644 --- a/docs/paper.bib +++ b/docs/paper.bib @@ -46,7 +46,7 @@ @article{Ehm:2016 urldate = {2023-09-06}, abstract = {In the practice of point prediction, it is desirable that forecasters receive a directive in the form of a statistical functional. For example, forecasters might be asked to report the mean or a quantile of their predictive distributions. When evaluating and comparing competing forecasts, it is then critical that the scoring function used for these purposes be consistent for the functional at hand, in the sense that the expected score is minimized when following the directive. We show that any scoring function that is consistent for a quantile or an expectile functional can be represented as a mixture of elementary or extremal scoring functions that form a linearly parameterized family. Scoring functions for the mean value and probability forecasts of binary events constitute important examples. The extremal scoring functions admit appealing economic interpretations of quantiles and expectiles in the context of betting and investment problems. The Choquet-type mixture representations give rise to simple checks of whether a forecast dominates another in the sense that it is preferable under any consistent scoring function. In empirical settings it suffices to compare the average scores for only a finite number of extremal elements. Plots of the average scores with respect to the extremal scoring functions, which we call Murphy diagrams, permit detailed comparisons of the relative merits of competing forecasts.} } -@article{Taggart:2022, +@article{Taggart:2022a, title = {A scoring framework for tiered warnings and multicategorical forecasts based on fixed risk measures}, author = {Taggart, Robert and Loveday, Nicholas and Griffiths, Deryn}, year = 2022, @@ -132,8 +132,7 @@ @Article{Foley:2020 year = {2020}, publisher = {American Meteorological Society} } - -@Article{Taggart:2022a, +@Article{Taggart:2022b, title = {Evaluation of point forecasts for extreme events using consistent scoring functions}, author = {Taggart, Robert}, journal = {Quarterly Journal of the Royal Meteorological Society}, @@ -143,7 +142,7 @@ @Article{Taggart:2022a year = {2022}, publisher = {Wiley Online Library} } -@Article{Taggart:2022b, +@Article{Taggart:2022c, title = {Point forecasting and forecast evaluation with generalized Huber loss}, author = {Taggart, Robert J}, journal = {Electronic Journal of Statistics}, diff --git a/docs/paper.md b/docs/paper.md index 6d1a6661..75ac6578 100644 --- a/docs/paper.md +++ b/docs/paper.md @@ -28,7 +28,7 @@ bibliography: paper.bib All of the scores and metrics in this package have undergone a thorough statistical and scientific review. Every score has a companion Jupyter Notebook demonstrating its use in practise. -At the time of writing, the scores contained in this package are: MSE, MAE, RMSE, FIRM [@Taggart:2022], CRPS, the FlipFlop index [@Griffiths:2019] and the Murphy score [@Ehm:2016]. It also includes the Diebold-Mariano statistical test [@Diebold:1995] with both the [@Harvey:1997] and [@Hering:2011] modifications. +At the time of writing, the scores contained in this package are: MSE, MAE, RMSE, FIRM [@Taggart:2022a], CRPS, the FlipFlop index [@Griffiths:2019] and the Murphy score [@Ehm:2016]. It also includes the Diebold-Mariano statistical test [@Diebold:1995] with both the [@Harvey:1997] and [@Hering:2011] modifications. # Statement of need @@ -46,7 +46,7 @@ The research purpose of this software is (a) to mathematically verify and valida ## Related Works -`scores` has arisen from, and now supports, the JIVE verification system, described by [@Loveday:2023]. `scores` includes the mathematical functions from this package and is intended to modularise these components. The JIVE metrics have been used by [@Griffiths:2017], [@Foley:2020], [@Taggart:2022a] and [Taggart:2022b] +`scores` has arisen from, and now supports, the JIVE verification system, described by [@Loveday:2023]. `scores` includes the mathematical functions from this package and is intended to modularise these components. The JIVE metrics have been used by [@Griffiths:2017], [@Foley:2020], [@Taggart:2022b] and [@Taggart:2022c] `climpred` [@Brady:2021] provides some related functionality and provides many of the same scores. `climpred` does not contain some of the novel functions contained within `scores`, and at the same time makes some design choices specifically associated with climate modelling which do not generalise as effectively to broader use cases as may be needed in some circumstances. Releasing `scores` separately allows the differing design philosophies to be considered by the community.