From e762e8a9b5594cb60d8a8571aae7a738cbaef0bb Mon Sep 17 00:00:00 2001 From: ajonesr <105310033+ajonesr@users.noreply.github.com> Date: Wed, 2 Aug 2023 08:33:56 -0500 Subject: [PATCH] Infer thresholds for detect_clearsky (#1784) * Add helper function to interpolate thresholds * Add infer_limits functionality in detect_clearsky * Update comments/docstrings * Add ValueError message * Fix typo * Update docstring * Add tests, update whatsnew * Update whatsnew * Add mocker test * Update whatsnew * whatsnew fix * Fix test error * Add check on length of data * Added check that optimizer succeeded * Handling output for old version of scipy * Update pvlib/clearsky.py Co-authored-by: Kevin Anderson * Update pvlib/tests/test_clearsky.py Co-authored-by: Kevin Anderson * Update pvlib/clearsky.py Co-authored-by: Kevin Anderson * Update pvlib/clearsky.py Co-authored-by: Kevin Anderson * Add suggestions from code review --------- Co-authored-by: Kevin Anderson --- docs/sphinx/source/whatsnew/v0.10.2.rst | 3 + pvlib/clearsky.py | 72 +++++++++- pvlib/data/detect_clearsky_threshold_data.csv | 126 ++++++++++++++++++ pvlib/tests/test_clearsky.py | 55 ++++++++ 4 files changed, 253 insertions(+), 3 deletions(-) create mode 100644 pvlib/data/detect_clearsky_threshold_data.csv diff --git a/docs/sphinx/source/whatsnew/v0.10.2.rst b/docs/sphinx/source/whatsnew/v0.10.2.rst index 7259f6a566..dacfe27f53 100644 --- a/docs/sphinx/source/whatsnew/v0.10.2.rst +++ b/docs/sphinx/source/whatsnew/v0.10.2.rst @@ -15,6 +15,8 @@ Enhancements :py:func:`pvlib.iotools.get_pvgis_hourly`, :py:func:`pvlib.iotools.get_cams`, :py:func:`pvlib.iotools.get_bsrn`, and :py:func:`pvlib.iotools.read_midc_raw_data_from_nrel`. (:pull:`1800`) +* Added option to infer threshold values for + :py:func:`pvlib.clearsky.detect_clearsky` (:issue:`1808`, :pull:`1784`) Bug fixes @@ -39,4 +41,5 @@ Requirements Contributors ~~~~~~~~~~~~ * Adam R. Jensen (:ghuser:`AdamRJensen`) +* Abigail Jones (:ghuser:`ajonesr`) * Taos Transue (:ghuser:`reepoi`) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index ffc7ac9b55..62318942da 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -644,8 +644,38 @@ def _clear_sample_index(clear_windows, samples_per_window, align, H): return clear_samples -def detect_clearsky(measured, clearsky, times=None, window_length=10, - mean_diff=75, max_diff=75, +def _clearsky_get_threshold(sample_interval): + """ + Returns threshold values for kwargs in detect_clearsky. See + Table 1 in [1]. + + References + ---------- + .. [1] Jordan, D.C. and C. Hansen, "Clear-sky detection for PV + degradation analysis using multiple regression", Renewable Energy, + v209, p. 393-400, 2023. + """ + + if (sample_interval < 1 or sample_interval > 30): + raise ValueError("`infer_limits=True` can only be used for inputs \ + with time step from 1 to 30 minutes") + + data_freq = np.array([1, 5, 15, 30]) + + window_length = np.interp(sample_interval, data_freq, [50, 60, 90, 120]) + mean_diff = np.interp(sample_interval, data_freq, [75, 75, 75, 75]) + max_diff = np.interp(sample_interval, data_freq, [60, 65, 75, 90]) + lower_line_length = np.interp(sample_interval, data_freq, [-45,-45,-45,-45]) + upper_line_length = np.interp(sample_interval, data_freq, [80, 80, 80, 80]) + var_diff = np.interp(sample_interval, data_freq, [0.005, 0.01, 0.032, 0.07]) + slope_dev = np.interp(sample_interval, data_freq, [50, 60, 75, 96]) + + return (window_length, mean_diff, max_diff, lower_line_length, + upper_line_length, var_diff, slope_dev) + + +def detect_clearsky(measured, clearsky, times=None, infer_limits=False, + window_length=10, mean_diff=75, max_diff=75, lower_line_length=-5, upper_line_length=10, var_diff=0.005, slope_dev=8, max_iterations=20, return_components=False): @@ -676,6 +706,9 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, times : DatetimeIndex or None, default None. Times of measured and clearsky values. If None the index of measured will be used. + infer_limits : bool, default False + If True, does not use passed in kwargs (or defaults), but instead + interpolates these values from Table 1 in [2]_. window_length : int, default 10 Length of sliding time window in minutes. Must be greater than 2 periods. @@ -731,6 +764,9 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, .. [1] Reno, M.J. and C.W. Hansen, "Identification of periods of clear sky irradiance in time series of GHI measurements" Renewable Energy, v90, p. 520-531, 2016. + .. [2] Jordan, D.C. and C. Hansen, "Clear-sky detection for PV + degradation analysis using multiple regression", Renewable Energy, + v209, p. 393-400, 2023. :doi:`10.1016/j.renene.2023.04.035` Notes ----- @@ -773,6 +809,21 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, sample_interval, samples_per_window = \ tools._get_sample_intervals(times, window_length) + # if infer_limits, find threshold values using the sample interval + if infer_limits: + window_length, mean_diff, max_diff, lower_line_length, \ + upper_line_length, var_diff, slope_dev = \ + _clearsky_get_threshold(sample_interval) + + # recalculate samples_per_window using returned window_length + _, samples_per_window = \ + tools._get_sample_intervals(times, window_length) + + # check that we have enough data to produce a nonempty hankel matrix + if len(times) < samples_per_window: + raise ValueError(f"times has only {len(times)} entries, but it must \ + have at least {samples_per_window} entries") + # generate matrix of integers for creating windows with indexing H = hankel(np.arange(samples_per_window), np.arange(samples_per_window-1, len(times))) @@ -826,7 +877,22 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, def rmse(alpha): return np.sqrt(np.mean((clear_meas - alpha*clear_clear)**2)) - alpha = minimize_scalar(rmse).x + optimize_result = minimize_scalar(rmse) + if not optimize_result.success: + try: + message = "Optimizer exited unsuccessfully: " \ + + optimize_result.message + except AttributeError: + message = "Optimizer exited unsuccessfully: \ + No message explaining the failure was returned. \ + If you would like to see this message, please \ + update your scipy version (try version 1.8.0 \ + or beyond)." + raise RuntimeError(message) + + else: + alpha = optimize_result.x + if round(alpha*10000) == round(previous_alpha*10000): break else: diff --git a/pvlib/data/detect_clearsky_threshold_data.csv b/pvlib/data/detect_clearsky_threshold_data.csv new file mode 100644 index 0000000000..a36736b1a3 --- /dev/null +++ b/pvlib/data/detect_clearsky_threshold_data.csv @@ -0,0 +1,126 @@ +# latitude:35.04 +# longitude:-106.62 +# elevation:1619 +# window_length:10 +Time (UTC),GHI,Clear or not +4/1/2012 17:30,862.0935267144818,1 +4/1/2012 17:31,863.9568654037691,1 +4/1/2012 17:32,865.802767695237,1 +4/1/2012 17:33,867.6311939211416,1 +4/1/2012 17:34,869.4421084958154,1 +4/1/2012 17:35,871.2354749580768,1 +4/1/2012 17:36,873.0112572200139,1 +4/1/2012 17:37,874.7694195555663,1 +4/1/2012 17:38,876.5099266138993,1 +4/1/2012 17:39,878.2327434081394,1 +4/1/2012 17:40,879.9378353205582,1 +4/1/2012 17:41,881.6251681074431,1 +4/1/2012 17:42,500.0,0 +4/1/2012 17:43,300.0,0 +4/1/2012 17:44,400.0,0 +4/1/2012 17:45,888.1962370575133,1 +4/1/2012 17:46,889.7942734332919,1 +4/1/2012 17:47,891.3743529660121,1 +4/1/2012 17:48,892.9364440027008,1 +4/1/2012 17:49,894.4805152569894,1 +4/1/2012 17:50,896.0065358138269,1 +4/1/2012 17:51,897.514475133874,1 +4/1/2012 17:52,899.0043030438522,1 +4/1/2012 17:53,900.4759897479845,1 +4/1/2012 17:54,901.9295058184756,1 +4/1/2012 17:55,903.3648231563722,1 +4/1/2012 17:56,950.0,0 +4/1/2012 17:57,906.1807424805852,1 +4/1/2012 17:58,907.5612891914101,1 +4/1/2012 17:59,908.9235237269177,1 +4/1/2012 18:00,910.2674188971979,1 +4/1/2012 18:01,911.592947889838,1 +4/1/2012 18:02,912.9000842614388,1 +4/1/2012 18:03,914.1888019477013,1 +4/1/2012 18:04,915.4590752551169,1 +4/1/2012 18:05,916.7108788648916,1 +4/1/2012 18:06,917.9441886574208,1 +4/1/2012 18:07,919.1589784086842,1 +4/1/2012 18:08,920.3552247618425,1 +4/1/2012 18:09,921.5329038989687,1 +4/1/2012 18:10,922.691992377965,1 +4/1/2012 18:11,923.8324671359268,1 +4/1/2012 18:12,924.9543054818693,1 +4/1/2012 18:13,926.057485105408,1 +4/1/2012 18:14,927.141984069634,1 +4/1/2012 18:15,928.2077808145282,1 +4/1/2012 18:16,929.254854160055,1 +4/1/2012 18:17,930.2831839827653,1 +4/1/2012 18:18,931.2927484780441,1 +4/1/2012 18:19,932.2835282910601,1 +4/1/2012 18:20,933.2555037490038,1 +4/1/2012 18:21,934.2086555597849,1 +4/1/2012 18:22,935.1429648059466,1 +4/1/2012 18:23,936.058412951919,1 +4/1/2012 18:24,936.9549818381174,1 +4/1/2012 18:25,937.8326536837798,1 +4/1/2012 18:26,938.6914110895166,1 +4/1/2012 18:27,939.5312370318452,1 +4/1/2012 18:28,940.3521148697101,1 +4/1/2012 18:29,941.1540288705581,1 +4/1/2012 18:30,941.9369620746523,1 +4/1/2012 18:31,942.7008995238247,1 +4/1/2012 18:32,943.4458260928685,1 +4/1/2012 18:33,944.1717270394992,1 +4/1/2012 18:34,944.8785879996012,1 +4/1/2012 18:35,945.5663949895419,1 +4/1/2012 18:36,946.2351344081491,1 +4/1/2012 18:37,946.8847930330305,1 +4/1/2012 18:38,947.5153580226918,1 +4/1/2012 18:39,948.126816918376,1 +4/1/2012 18:40,948.7191580309192,1 +4/1/2012 18:41,949.2923688693852,1 +4/1/2012 18:42,949.8464385206038,1 +4/1/2012 18:43,950.3813560493355,1 +4/1/2012 18:44,950.8971109033968,1 +4/1/2012 18:45,951.3936929103834,1 +4/1/2012 18:46,951.8710922815745,1 +4/1/2012 18:47,952.3292996087865,1 +4/1/2012 18:48,952.7683058659376,1 +4/1/2012 18:49,953.1881024102506,1 +4/1/2012 18:50,953.5886809795984,1 +4/1/2012 18:51,953.9700339452911,1 +4/1/2012 18:52,954.3321532981175,1 +4/1/2012 18:53,954.6750321861093,1 +4/1/2012 18:54,954.9986638782349,1 +4/1/2012 18:55,955.3030420248847,1 +4/1/2012 18:56,955.5881606602495,1 +4/1/2012 18:57,955.8540142004646,1 +4/1/2012 18:58,956.1005974445337,1 +4/1/2012 18:59,956.3279055749699,1 +4/1/2012 19:00,956.5359341563872,1 +4/1/2012 19:01,956.7246791371283,1 +4/1/2012 19:02,956.8941369551687,1 +4/1/2012 19:03,957.0443040971436,1 +4/1/2012 19:04,957.175177780536,1 +4/1/2012 19:05,957.2867554853715,1 +4/1/2012 19:06,957.3790350752402,1 +4/1/2012 19:07,957.4520147966049,1 +4/1/2012 19:08,957.5056932791584,1 +4/1/2012 19:09,957.5400695359402,1 +4/1/2012 19:10,957.5551429630466,1 +4/1/2012 19:11,957.5509133398784,1 +4/1/2012 19:12,957.527380828979,1 +4/1/2012 19:13,957.4845459762313,1 +4/1/2012 19:14,957.4224096624041,1 +4/1/2012 19:15,957.3409732831614,1 +4/1/2012 19:16,957.2402384985319,1 +4/1/2012 19:17,957.1202073871192,1 +4/1/2012 19:18,956.9808824107419,1 +4/1/2012 19:19,956.8222664139458,1 +4/1/2012 19:20,956.6443626248492,1 +4/1/2012 19:21,956.4471746540574,1 +4/1/2012 19:22,956.2307064955613,1 +4/1/2012 19:23,955.9949625264852,1 +4/1/2012 19:24,955.7399475061342,1 +4/1/2012 19:25,955.4656663871413,1 +4/1/2012 19:26,955.1721250622959,1 +4/1/2012 19:27,954.8593292624192,1 +4/1/2012 19:28,954.5272852786308,1 +4/1/2012 19:29,954.175999783701,1 +4/1/2012 19:30,953.8054798341012,1 diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index 37b95dcdf9..c2ef607f0f 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -533,6 +533,49 @@ def detect_clearsky_data(): return expected, cs +@pytest.fixture +def detect_clearsky_threshold_data(): + # this is (roughly) just a 2 hour period of the same data in + # detect_clearsky_data (which only spans 30 minutes) + data_file = DATA_DIR / 'detect_clearsky_threshold_data.csv' + expected = pd.read_csv( + data_file, index_col=0, parse_dates=True, comment='#') + expected = expected.tz_localize('UTC').tz_convert('Etc/GMT+7') + metadata = {} + with data_file.open() as f: + for line in f: + if line.startswith('#'): + key, value = line.strip('# \n').split(':') + metadata[key] = float(value) + else: + break + metadata['window_length'] = int(metadata['window_length']) + loc = Location(metadata['latitude'], metadata['longitude'], + altitude=metadata['elevation']) + # specify turbidity to guard against future lookup changes + cs = loc.get_clearsky(expected.index, linke_turbidity=2.658197) + return expected, cs + + +def test_clearsky_get_threshold(): + out = clearsky._clearsky_get_threshold(4.5) + expected = (58.75, 75, 64.375, -45, 80.0, 0.009375, 58.75) + assert np.allclose(out, expected) + + +def test_clearsky_get_threshold_raises_error(): + with pytest.raises(ValueError, match='can only be used for inputs'): + clearsky._clearsky_get_threshold(0.5) + + +def test_detect_clearsky_calls_threshold(mocker, detect_clearsky_threshold_data): + threshold_spy = mocker.spy(clearsky, '_clearsky_get_threshold') + expected, cs = detect_clearsky_threshold_data + threshold_actual = clearsky.detect_clearsky(expected['GHI'], cs['ghi'], + infer_limits=True) + assert threshold_spy.call_count == 1 + + def test_detect_clearsky(detect_clearsky_data): expected, cs = detect_clearsky_data clear_samples = clearsky.detect_clearsky( @@ -629,6 +672,18 @@ def test_detect_clearsky_missing_index(detect_clearsky_data): clearsky.detect_clearsky(expected['GHI'].values, cs['ghi'].values) +def test_detect_clearsky_not_enough_data(detect_clearsky_data): + expected, cs = detect_clearsky_data + with pytest.raises(ValueError, match='have at least'): + clearsky.detect_clearsky(expected['GHI'], cs['ghi'], window_length=60) + + +def test_detect_clearsky_optimizer_failed(detect_clearsky_data): + expected, cs = detect_clearsky_data + with pytest.raises(RuntimeError, match='Optimizer exited unsuccessfully'): + clearsky.detect_clearsky(expected['GHI'], cs['ghi'], window_length=15) + + @pytest.fixture def detect_clearsky_helper_data(): samples_per_window = 3