From b5e89be2dc7ec761fd1bf6eb21691fc10590c95b Mon Sep 17 00:00:00 2001 From: Mark Mikofski Date: Fri, 16 Aug 2024 12:42:42 -0700 Subject: [PATCH] coerce and rotate pvgis TMY data to desired tz and year (#2138) * coerce and rotate pvgis TMY data to desired tz and year - add private function `_coerce_and_rotate_pvgis()` - add `utc_offset` and `coerce_year` params to docstring for `get_pvgis_tmy` - call private function if `utc_offset` is not zero * test get_pvgis_tmy_coerce_year check if utc_offset and coerce_year work as expected * fix flake8 in test_pvgis_coerce_year - remove whitespace - shorter lines * remove iloc for index in test pvgis coerce - incorrect syntax for indices * deal with leap year in pvgis when coercing - if february is a leap year, when shifting tz, causes issues - so replace february year with non-leap year * fix space around operator in coerce pvgis - also fix use ts for timestamp when fixing feb leap year * fix pd.Timestamp in pvgis coerce year - lower case "s" not TimeStamp * Update v0.11.1 what's new for coerce pvgis tmy - add description and links to issue/pr * replace year and tzinfo in pvgis_tmy coerce year - also use np.roll - also make new index and dataframe instead of altering original - removes need to sanitize original index February for leap year - remove calendar import but add numpy and pytz - code much simpler shorter, easier to read * remove unused imports from pvgis.py for flake8 * change private function name to _coerce_and_roll_pvgis_tmy * spot check rolled pvgis TMY values after converting tz - fix Turin is actually CET which is UTC+1 - be DRY so use variables for test year and tz constants, versus WET and hardcoded - check tz unchanged if default zero utc_offset - use _ output args instead of indexing data[0] - add comments * Update utc_offset description - explain setting utc_offset will roll data to start at Jan 1 midnight * change coerce_year and utc_offset defaults to None in pvgis TMY - update arg docstrings - allow user to coerce year even if utc_offset is None or zero - use 1990 as default if utc_offset is not None or zero, but coerce_year was unspecified - add warning comment to be explicit and test identity to avoid unexpected implicit booleaness * rename roll_utc_offset in get_pvgis_tmy - refactor utc_offset everywhere including comments and docstring - add additional test to coerce year even if utc offset is zero or none - change tzname to 'UTC' (versus Etc/GMT or Etc/GMT+0) if replacing with zero utc offset * Update pvlib/iotools/pvgis.py with suggestions use "optional" vs. "default None" per #1574 Co-authored-by: Kevin Anderson * Update docs/sphinx/source/whatsnew/v0.11.1.rst rename argument "roll_utc_offset" in whatsnew Co-authored-by: Kevin Anderson * rename _coerce_and_roll_tmy, remove 'pvgis' * rename index with new tz in coerce pvgis tmy * allow tz of None in _coerce_and_roll_tmy - treat tz=None as UTC - allows get_pvgis_tmy to be simpler - remove unnecessary comments * clarify input tmy_data is UTC... - ... in docstring of private function pvgis._coerce_and_roll_tmy() - rename tmy_data - name new_index explicitly using pd.DatetimeIndex() * fix flake8 in _coerce_and_roll_tmy --------- Co-authored-by: Kevin Anderson Co-authored-by: Adam R. Jensen <39184289+AdamRJensen@users.noreply.github.com> --- docs/sphinx/source/whatsnew/v0.11.1.rst | 5 +++ pvlib/iotools/pvgis.py | 42 ++++++++++++++++++++-- pvlib/tests/iotools/test_pvgis.py | 47 +++++++++++++++++++++++++ 3 files changed, 91 insertions(+), 3 deletions(-) diff --git a/docs/sphinx/source/whatsnew/v0.11.1.rst b/docs/sphinx/source/whatsnew/v0.11.1.rst index 89c17aadbb..72d480e32d 100644 --- a/docs/sphinx/source/whatsnew/v0.11.1.rst +++ b/docs/sphinx/source/whatsnew/v0.11.1.rst @@ -19,6 +19,10 @@ Enhancements * Add new parameters for min/max absolute air mass to :py:func:`pvlib.spectrum.spectral_factor_firstsolar`. (:issue:`2086`, :pull:`2100`) +* Add ``roll_utc_offset`` and ``coerce_year`` arguments to + :py:func:`pvlib.iotools.get_pvgis_tmy` to allow user to specify time zone, + rotate indices of TMY to begin at midnight, and force indices to desired + year. (:issue:`2139`, :pull:`2138`) * Restructured the pvlib/spectrum folder by breaking up the contents of pvlib/spectrum/mismatch.py into pvlib/spectrum/mismatch.py, pvlib/spectrum/irradiance.py, and @@ -62,5 +66,6 @@ Contributors * Leonardo Micheli (:ghuser:`lmicheli`) * Echedey Luis (:ghuser:`echedey-ls`) * Rajiv Daxini (:ghuser:`RDaxini`) +* Mark A. Mikofski (:ghuser:`mikofski`) * Ben Pierce (:ghuser:`bgpierc`) * Jose Meza (:ghuser:`JoseMezaMendieta`) diff --git a/pvlib/iotools/pvgis.py b/pvlib/iotools/pvgis.py index 06986bf2e5..fbfcf55e33 100644 --- a/pvlib/iotools/pvgis.py +++ b/pvlib/iotools/pvgis.py @@ -18,10 +18,10 @@ import json from pathlib import Path import requests +import numpy as np import pandas as pd +import pytz from pvlib.iotools import read_epw, parse_epw -import warnings -from pvlib._deprecation import pvlibDeprecationWarning URL = 'https://re.jrc.ec.europa.eu/api/' @@ -390,9 +390,33 @@ def read_pvgis_hourly(filename, pvgis_format=None, map_variables=True): raise ValueError(err_msg) +def _coerce_and_roll_tmy(tmy_data, tz, year): + """ + Assumes ``tmy_data`` input is UTC, converts from UTC to ``tz``, rolls + dataframe so timeseries starts at midnight, and forces all indices to + ``year``. Only works for integer ``tz``, but ``None`` and ``False`` are + re-interpreted as zero / UTC. + """ + if tz: + tzname = pytz.timezone(f'Etc/GMT{-tz:+d}') + else: + tz = 0 + tzname = pytz.timezone('UTC') + new_index = pd.DatetimeIndex([ + timestamp.replace(year=year, tzinfo=tzname) + for timestamp in tmy_data.index], + name=f'time({tzname})') + new_tmy_data = pd.DataFrame( + np.roll(tmy_data, tz, axis=0), + columns=tmy_data.columns, + index=new_index) + return new_tmy_data + + def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True, userhorizon=None, startyear=None, endyear=None, - map_variables=True, url=URL, timeout=30): + map_variables=True, url=URL, timeout=30, + roll_utc_offset=None, coerce_year=None): """ Get TMY data from PVGIS. @@ -424,6 +448,13 @@ def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True, base url of PVGIS API, append ``tmy`` to get TMY endpoint timeout : int, default 30 time in seconds to wait for server response before timeout + roll_utc_offset: int, optional + Use to specify a time zone other than the default UTC zero and roll + dataframe by ``roll_utc_offset`` so it starts at midnight on January + 1st. Ignored if ``None``, otherwise will force year to ``coerce_year``. + coerce_year: int, optional + Use to force indices to desired year. Will default to 1990 if + ``coerce_year`` is not specified, but ``roll_utc_offset`` is specified. Returns ------- @@ -510,6 +541,11 @@ def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True, if map_variables: data = data.rename(columns=VARIABLE_MAP) + if not (roll_utc_offset is None and coerce_year is None): + # roll_utc_offset is specified, but coerce_year isn't + coerce_year = coerce_year or 1990 + data = _coerce_and_roll_tmy(data, roll_utc_offset, coerce_year) + return data, months_selected, inputs, meta diff --git a/pvlib/tests/iotools/test_pvgis.py b/pvlib/tests/iotools/test_pvgis.py index 45c4cde46b..728e6789e8 100644 --- a/pvlib/tests/iotools/test_pvgis.py +++ b/pvlib/tests/iotools/test_pvgis.py @@ -435,6 +435,53 @@ def _compare_pvgis_tmy_basic(expected, meta_expected, pvgis_data): assert np.allclose(data[outvar], expected[outvar]) +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_pvgis_tmy_coerce_year(): + """test utc_offset and coerce_year work as expected""" + base_case, _, _, _ = get_pvgis_tmy(45, 8) # Turin + assert str(base_case.index.tz) == 'UTC' + assert base_case.index.name == 'time(UTC)' + noon_test_data = [ + base_case[base_case.index.month == m].iloc[12] + for m in range(1, 13)] + cet_tz = 1 # Turin time is CET + cet_name = 'Etc/GMT-1' + # check indices of rolled data after converting timezone + pvgis_data, _, _, _ = get_pvgis_tmy(45, 8, roll_utc_offset=cet_tz) + jan1_midnight = pd.Timestamp('1990-01-01 00:00:00', tz=cet_name) + dec31_midnight = pd.Timestamp('1990-12-31 23:00:00', tz=cet_name) + assert pvgis_data.index[0] == jan1_midnight + assert pvgis_data.index[-1] == dec31_midnight + assert pvgis_data.index.name == f'time({cet_name})' + # spot check rolled data matches original + for m, test_case in enumerate(noon_test_data): + expected = pvgis_data[pvgis_data.index.month == m+1].iloc[12+cet_tz] + assert all(test_case == expected) + # repeat tests with year coerced + test_yr = 2021 + pvgis_data, _, _, _ = get_pvgis_tmy( + 45, 8, roll_utc_offset=cet_tz, coerce_year=test_yr) + jan1_midnight = pd.Timestamp(f'{test_yr}-01-01 00:00:00', tz=cet_name) + dec31_midnight = pd.Timestamp(f'{test_yr}-12-31 23:00:00', tz=cet_name) + assert pvgis_data.index[0] == jan1_midnight + assert pvgis_data.index[-1] == dec31_midnight + assert pvgis_data.index.name == f'time({cet_name})' + for m, test_case in enumerate(noon_test_data): + expected = pvgis_data[pvgis_data.index.month == m+1].iloc[12+cet_tz] + assert all(test_case == expected) + # repeat tests with year coerced but utc offset none or zero + pvgis_data, _, _, _ = get_pvgis_tmy(45, 8, coerce_year=test_yr) + jan1_midnight = pd.Timestamp(f'{test_yr}-01-01 00:00:00', tz='UTC') + dec31_midnight = pd.Timestamp(f'{test_yr}-12-31 23:00:00', tz='UTC') + assert pvgis_data.index[0] == jan1_midnight + assert pvgis_data.index[-1] == dec31_midnight + assert pvgis_data.index.name == 'time(UTC)' + for m, test_case in enumerate(noon_test_data): + expected = pvgis_data[pvgis_data.index.month == m+1].iloc[12] + assert all(test_case == expected) + + @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) def test_get_pvgis_tmy_csv(expected, month_year_expected, inputs_expected,