diff --git a/docs/sphinx/source/whatsnew/v0.11.1.rst b/docs/sphinx/source/whatsnew/v0.11.1.rst index 89c17aadbb..72d480e32d 100644 --- a/docs/sphinx/source/whatsnew/v0.11.1.rst +++ b/docs/sphinx/source/whatsnew/v0.11.1.rst @@ -19,6 +19,10 @@ Enhancements * Add new parameters for min/max absolute air mass to :py:func:`pvlib.spectrum.spectral_factor_firstsolar`. (:issue:`2086`, :pull:`2100`) +* Add ``roll_utc_offset`` and ``coerce_year`` arguments to + :py:func:`pvlib.iotools.get_pvgis_tmy` to allow user to specify time zone, + rotate indices of TMY to begin at midnight, and force indices to desired + year. (:issue:`2139`, :pull:`2138`) * Restructured the pvlib/spectrum folder by breaking up the contents of pvlib/spectrum/mismatch.py into pvlib/spectrum/mismatch.py, pvlib/spectrum/irradiance.py, and @@ -62,5 +66,6 @@ Contributors * Leonardo Micheli (:ghuser:`lmicheli`) * Echedey Luis (:ghuser:`echedey-ls`) * Rajiv Daxini (:ghuser:`RDaxini`) +* Mark A. Mikofski (:ghuser:`mikofski`) * Ben Pierce (:ghuser:`bgpierc`) * Jose Meza (:ghuser:`JoseMezaMendieta`) diff --git a/pvlib/iotools/pvgis.py b/pvlib/iotools/pvgis.py index 06986bf2e5..fbfcf55e33 100644 --- a/pvlib/iotools/pvgis.py +++ b/pvlib/iotools/pvgis.py @@ -18,10 +18,10 @@ import json from pathlib import Path import requests +import numpy as np import pandas as pd +import pytz from pvlib.iotools import read_epw, parse_epw -import warnings -from pvlib._deprecation import pvlibDeprecationWarning URL = 'https://re.jrc.ec.europa.eu/api/' @@ -390,9 +390,33 @@ def read_pvgis_hourly(filename, pvgis_format=None, map_variables=True): raise ValueError(err_msg) +def _coerce_and_roll_tmy(tmy_data, tz, year): + """ + Assumes ``tmy_data`` input is UTC, converts from UTC to ``tz``, rolls + dataframe so timeseries starts at midnight, and forces all indices to + ``year``. Only works for integer ``tz``, but ``None`` and ``False`` are + re-interpreted as zero / UTC. + """ + if tz: + tzname = pytz.timezone(f'Etc/GMT{-tz:+d}') + else: + tz = 0 + tzname = pytz.timezone('UTC') + new_index = pd.DatetimeIndex([ + timestamp.replace(year=year, tzinfo=tzname) + for timestamp in tmy_data.index], + name=f'time({tzname})') + new_tmy_data = pd.DataFrame( + np.roll(tmy_data, tz, axis=0), + columns=tmy_data.columns, + index=new_index) + return new_tmy_data + + def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True, userhorizon=None, startyear=None, endyear=None, - map_variables=True, url=URL, timeout=30): + map_variables=True, url=URL, timeout=30, + roll_utc_offset=None, coerce_year=None): """ Get TMY data from PVGIS. @@ -424,6 +448,13 @@ def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True, base url of PVGIS API, append ``tmy`` to get TMY endpoint timeout : int, default 30 time in seconds to wait for server response before timeout + roll_utc_offset: int, optional + Use to specify a time zone other than the default UTC zero and roll + dataframe by ``roll_utc_offset`` so it starts at midnight on January + 1st. Ignored if ``None``, otherwise will force year to ``coerce_year``. + coerce_year: int, optional + Use to force indices to desired year. Will default to 1990 if + ``coerce_year`` is not specified, but ``roll_utc_offset`` is specified. Returns ------- @@ -510,6 +541,11 @@ def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True, if map_variables: data = data.rename(columns=VARIABLE_MAP) + if not (roll_utc_offset is None and coerce_year is None): + # roll_utc_offset is specified, but coerce_year isn't + coerce_year = coerce_year or 1990 + data = _coerce_and_roll_tmy(data, roll_utc_offset, coerce_year) + return data, months_selected, inputs, meta diff --git a/pvlib/tests/iotools/test_pvgis.py b/pvlib/tests/iotools/test_pvgis.py index 45c4cde46b..728e6789e8 100644 --- a/pvlib/tests/iotools/test_pvgis.py +++ b/pvlib/tests/iotools/test_pvgis.py @@ -435,6 +435,53 @@ def _compare_pvgis_tmy_basic(expected, meta_expected, pvgis_data): assert np.allclose(data[outvar], expected[outvar]) +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_pvgis_tmy_coerce_year(): + """test utc_offset and coerce_year work as expected""" + base_case, _, _, _ = get_pvgis_tmy(45, 8) # Turin + assert str(base_case.index.tz) == 'UTC' + assert base_case.index.name == 'time(UTC)' + noon_test_data = [ + base_case[base_case.index.month == m].iloc[12] + for m in range(1, 13)] + cet_tz = 1 # Turin time is CET + cet_name = 'Etc/GMT-1' + # check indices of rolled data after converting timezone + pvgis_data, _, _, _ = get_pvgis_tmy(45, 8, roll_utc_offset=cet_tz) + jan1_midnight = pd.Timestamp('1990-01-01 00:00:00', tz=cet_name) + dec31_midnight = pd.Timestamp('1990-12-31 23:00:00', tz=cet_name) + assert pvgis_data.index[0] == jan1_midnight + assert pvgis_data.index[-1] == dec31_midnight + assert pvgis_data.index.name == f'time({cet_name})' + # spot check rolled data matches original + for m, test_case in enumerate(noon_test_data): + expected = pvgis_data[pvgis_data.index.month == m+1].iloc[12+cet_tz] + assert all(test_case == expected) + # repeat tests with year coerced + test_yr = 2021 + pvgis_data, _, _, _ = get_pvgis_tmy( + 45, 8, roll_utc_offset=cet_tz, coerce_year=test_yr) + jan1_midnight = pd.Timestamp(f'{test_yr}-01-01 00:00:00', tz=cet_name) + dec31_midnight = pd.Timestamp(f'{test_yr}-12-31 23:00:00', tz=cet_name) + assert pvgis_data.index[0] == jan1_midnight + assert pvgis_data.index[-1] == dec31_midnight + assert pvgis_data.index.name == f'time({cet_name})' + for m, test_case in enumerate(noon_test_data): + expected = pvgis_data[pvgis_data.index.month == m+1].iloc[12+cet_tz] + assert all(test_case == expected) + # repeat tests with year coerced but utc offset none or zero + pvgis_data, _, _, _ = get_pvgis_tmy(45, 8, coerce_year=test_yr) + jan1_midnight = pd.Timestamp(f'{test_yr}-01-01 00:00:00', tz='UTC') + dec31_midnight = pd.Timestamp(f'{test_yr}-12-31 23:00:00', tz='UTC') + assert pvgis_data.index[0] == jan1_midnight + assert pvgis_data.index[-1] == dec31_midnight + assert pvgis_data.index.name == 'time(UTC)' + for m, test_case in enumerate(noon_test_data): + expected = pvgis_data[pvgis_data.index.month == m+1].iloc[12] + assert all(test_case == expected) + + @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) def test_get_pvgis_tmy_csv(expected, month_year_expected, inputs_expected,