Skip to content

Commit

Permalink
added warning for dm tests if there are NaNs
Browse files Browse the repository at this point in the history
  • Loading branch information
nicholasloveday committed Aug 30, 2023
1 parent 41dec0f commit e4d911c
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 35 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ dependencies = [
"xarray",
"pandas",
"scipy",
"bottleneck",
"bottleneck"
]

[project.optional-dependencies]
Expand Down
2 changes: 1 addition & 1 deletion src/scores/stats/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""
The philosphy is to import the public API during the init phase rather than leaving it to the user
"""
import scores.stats.tests
import scores.stats.tests
9 changes: 5 additions & 4 deletions src/scores/stats/tests/acovf.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@

__all__ = ["acovf"]


def _next_regular(target):
"""
Find the next regular number greater than or equal to target.
Expand Down Expand Up @@ -107,18 +108,18 @@ def acovf(x):
Estimate autocovariances.
Args:
x (array_like):
x (array_like):
Time series data. Must be 1d.
Returns:
(np.ndarray):
(np.ndarray):
The estimated autocovariances.
References:
[1] Parzen, E., 1963. On spectral analysis with missing observations
and amplitude modulation. Sankhya: The Indian Journal of
Statistics, Series A, pp.383-392.
"""
"""

xo = x - x.mean()

Expand All @@ -132,4 +133,4 @@ def acovf(x):
acov = np.fft.ifft(Frf * np.conjugate(Frf))[:nobs] / d[nobs - 1 :]
acov = acov.real

return acov
return acov
18 changes: 15 additions & 3 deletions src/scores/stats/tests/diebold_mariano_impl.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Functions for calculating a modified Diebold-Mariano test statistic
"""
import warnings
from typing import Literal

import numpy as np
Expand All @@ -11,8 +12,6 @@
from scores.utils import dims_complement




def diebold_mariano(
da_timeseries: xr.DataArray,
ts_dim: str,
Expand All @@ -34,7 +33,9 @@ def diebold_mariano(
estimates for the spectral density contribution to the test statistic. For further
details see `scores.stats.confidence_intervals.impl._dm_test_statistic`.
Prior to any calculations, NaNs are removed from each timeseries.
Prior to any calculations, NaNs are removed from each timeseries. If there are NaNs
in `da_timeseries` then a warning will occur. This is because NaNs may impact the
autocovariance calculation.
To determine the value of h for each timeseries of score differences of h-step ahead
forecasts, one may ask 'How many observations of the phenomenon will be made between
Expand Down Expand Up @@ -95,6 +96,7 @@ def diebold_mariano(
ValueError: if `h_coord` values aren't positive integers.
ValueError: if `h_coord` values aren't less than the lengths of the
timeseries after NaNs are removed.
RuntimeWarnning: if there is a NaN in diffs.
References:
- Diebold and Mariano, 'Comparing predictive accuracy', Journal of Business and
Expand Down Expand Up @@ -229,6 +231,7 @@ def _dm_test_statistic(diffs: np.ndarray, h: int, method: Literal["HG", "HLN"] =
Raises:
ValueError: if `method` is not one of "HLN" or "HG".
ValueError: if `0 < h < len(diffs)` fails after NaNs removed.
RuntimeWarnning: if there is a NaN in diffs.
References:
- Diebold and Mariano, 'Comparing predictive accuracy', Journal of Business and
Expand All @@ -241,6 +244,14 @@ def _dm_test_statistic(diffs: np.ndarray, h: int, method: Literal["HG", "HLN"] =
if method not in ["HLN", "HG"]:
raise ValueError("`method` must be one of 'HLN' or 'HG'.")

if np.isnan(np.sum(diffs)):
warnings.warn(
RuntimeWarning(
"A least one NaN value was detected in `da_timeseries`. This may impact the "
"calculation of autocovariances."
)
)

diffs = diffs[~np.isnan(diffs)]

if not 0 < h < len(diffs):
Expand Down Expand Up @@ -295,6 +306,7 @@ def _hg_method_stat(diffs: np.ndarray, h: int) -> float:
Diebold-Mariano test statistic using the HG method.
"""
from scores.stats.tests.acovf import acovf

n = len(diffs)

# use an exponential model for autocovariances of `diffs`
Expand Down
34 changes: 18 additions & 16 deletions tests/scores/stats/tests/test_acovf.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@

from scores.stats.tests.acovf import _next_regular


@pytest.mark.parametrize(
("x", "y"),
[
Expand Down Expand Up @@ -98,27 +99,28 @@
(288325195312500000, 288325195312500000),
(288325195312500000 + 1, 288555831593533440),
# power of 3 3**83
(3 ** 83 - 1, 3 ** 83),
(3 ** 83, 3 ** 83),
(3**83 - 1, 3**83),
(3**83, 3**83),
# power of 2 2**135
(2 ** 135 - 1, 2 ** 135),
(2 ** 135, 2 ** 135),
(2**135 - 1, 2**135),
(2**135, 2**135),
# power of 5 5**57
(5 ** 57 - 1, 5 ** 57),
(5 ** 57, 5 ** 57),
(5**57 - 1, 5**57),
(5**57, 5**57),
# http,//www.drdobbs.com/228700538
# 2**96 * 3**1 * 5**13
(2 ** 96 * 3 ** 1 * 5 ** 13 - 1, 2 ** 96 * 3 ** 1 * 5 ** 13),
(2 ** 96 * 3 ** 1 * 5 ** 13, 2 ** 96 * 3 ** 1 * 5 ** 13),
(2 ** 96 * 3 ** 1 * 5 ** 13 + 1, 2 ** 43 * 3 ** 11 * 5 ** 29),
(2**96 * 3**1 * 5**13 - 1, 2**96 * 3**1 * 5**13),
(2**96 * 3**1 * 5**13, 2**96 * 3**1 * 5**13),
(2**96 * 3**1 * 5**13 + 1, 2**43 * 3**11 * 5**29),
# 2**36 * 3**69 * 5**7
(2 ** 36 * 3 ** 69 * 5 ** 7 - 1, 2 ** 36 * 3 ** 69 * 5 ** 7),
(2 ** 36 * 3 ** 69 * 5 ** 7, 2 ** 36 * 3 ** 69 * 5 ** 7),
(2 ** 36 * 3 ** 69 * 5 ** 7 + 1, 2 ** 90 * 3 ** 32 * 5 ** 9),
(2**36 * 3**69 * 5**7 - 1, 2**36 * 3**69 * 5**7),
(2**36 * 3**69 * 5**7, 2**36 * 3**69 * 5**7),
(2**36 * 3**69 * 5**7 + 1, 2**90 * 3**32 * 5**9),
# 2**37 * 3**44 * 5**42
(2 ** 37 * 3 ** 44 * 5 ** 42 - 1, 2 ** 37 * 3 ** 44 * 5 ** 42),
(2 ** 37 * 3 ** 44 * 5 ** 42, 2 ** 37 * 3 ** 44 * 5 ** 42),
(2 ** 37 * 3 ** 44 * 5 ** 42 + 1, 2 ** 20 * 3 ** 106 * 5 ** 7),
])
(2**37 * 3**44 * 5**42 - 1, 2**37 * 3**44 * 5**42),
(2**37 * 3**44 * 5**42, 2**37 * 3**44 * 5**42),
(2**37 * 3**44 * 5**42 + 1, 2**20 * 3**106 * 5**7),
],
)
def test_next_regular(x, y):
assert_equal(_next_regular(x), y)
33 changes: 23 additions & 10 deletions tests/scores/stats/tests/test_diebold_mariano.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
This module contains unit tests for scores.stats.tests.diebold_mariano_impl
"""
import warnings
import numpy as np
import pytest
import xarray as xr
Expand Down Expand Up @@ -270,7 +271,6 @@ def test__hln_method_stat(diffs, h, expected):
("diffs", "h", "method", "expected"),
[
(DM_DIFFS, 2, "HLN", DM_TEST_STAT_EXP1),
(np.array([np.nan, 1, 2, 3, 4.0, np.nan]), 2, "HLN", DM_TEST_STAT_EXP1),
(np.array([1.0, 1, 1, 1]), 2, "HLN", np.nan),
(np.array([0, 0, 0, 0]), 2, "HLN", np.nan),
(DM_DIFFS2, 14, "HG", -0.1691921),
Expand All @@ -282,6 +282,15 @@ def test__dm_test_statistic(diffs, h, method, expected):
np.testing.assert_allclose(result, expected, atol=1e-5)


def test__dm_test_statistic_with_nan():
"""Tests that _dm_test_statistic returns values as expected with a NaN diffs."""
diffs = np.array([np.nan, 1, 2, 3, 4.0, np.nan])
with pytest.warns(RuntimeWarning):
result = _dm_test_statistic(diffs, 2, "HLN")
np.testing.assert_allclose(result, DM_TEST_STAT_EXP1, atol=1e-5)


@pytest.mark.filterwarnings("ignore::RuntimeWarning")
@pytest.mark.parametrize(
("diff", "h", "method", "error_msg"),
[
Expand Down Expand Up @@ -346,7 +355,10 @@ def test__dm_test_statistic_raises(diff, h, method, error_msg):
],
)
def test_diebold_mariano(distribution, expected):
"""Tests that diebold_mariano gives results as expected."""
"""
Tests that diebold_mariano gives results as expected and raises a warning
due to a NaN in the data.
"""
da_timeseries = xr.DataArray(
data=[[1, 2, 3.0, 4, np.nan], [2.0, 1, -3, -1, 0], [1.0, 1, 1, 1, 1]],
dims=["lead_day", "valid_date"],
Expand All @@ -356,12 +368,13 @@ def test_diebold_mariano(distribution, expected):
"h": ("lead_day", [2, 3, 4]),
},
)
result = diebold_mariano(
da_timeseries,
"lead_day",
"h",
method="HLN",
confidence_level=0.9,
statistic_distribution=distribution,
)
with pytest.warns(RuntimeWarning):
result = diebold_mariano(
da_timeseries,
"lead_day",
"h",
method="HLN",
confidence_level=0.9,
statistic_distribution=distribution,
)
xr.testing.assert_allclose(result, expected, atol=7)

0 comments on commit e4d911c

Please sign in to comment.