Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Script to fetch OpenMeteo Data(NWP Forecast and Historical data) #93

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions OpenMeteo/OpenMeteo_xr
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @jacobbieker,
I'm encountering an issue while creating an xarray dataset with the OpenMeteo data due to dimension problems. Although I'm able to successfully fetch datasets for multiple coordinates, I'm facing challenges with dimension handling. although the len of dims are same, still!
image

I'm planning to add an argument for NWP (Numerical Weather Prediction) if we need to specify a particular NWP in the function. What do you think about this approach?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, this is a bit hard to debug from this, but if you add to each data point the coord latitude and longitude, that might then work to reshape into a grid?

For adding an argument to specify the NWP, that is perfect! We want to be able to access all the NWPs from OpenMeteo from this, so that would be ideal.

Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
import numpy as np
import xarray as xr
from typing import Tuple, List

class WeatherDataFetcher:
def __init__(self):
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after=3600)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
self.openmeteo = openmeteo_requests.Client(session=retry_session)

def generate_lat_lon_grid(self, lat_range: Tuple[float, float] = (-90, 90), lon_range: Tuple[float, float] = (-180, 180), lat_step: float = 0.25, lon_step: float = 0.25) -> Tuple[np.ndarray, np.ndarray]:
latitudes = np.arange(lat_range[0], lat_range[1] + lat_step, lat_step)
longitudes = np.arange(lon_range[0], lon_range[1] + lon_step, lon_step)
return latitudes, longitudes

def fetch_world_grid_data(self, start_date: str, end_date: str, weather_variables: List[str]) -> xr.Dataset:
# Generate latitude and longitude grid
latitudes, longitudes = self.generate_lat_lon_grid()

# Split the grid into smaller chunks (adjust as needed)
chunk_size = 200
latitude_chunks = [latitudes[i:i+chunk_size] for i in range(0, len(latitudes), chunk_size)]
longitude_chunks = [longitudes[i:i+chunk_size] for i in range(0, len(longitudes), chunk_size)]

all_data = []
lat = []
lon = []
# Make API requests for each chunk of latitude and longitude values
for lat_chunk, lon_chunk in zip(latitude_chunks, longitude_chunks):
params = {
"latitude": lat_chunk.tolist(),
"longitude": lon_chunk.tolist(),
"hourly": weather_variables,
"start_date": start_date,
"end_date": end_date
}
try:
responses = self.openmeteo.weather_api(url, params=params)

except:
break
res = [lat for lat in lat_chunk.tolist()]
lat+=res
res = [lon for lon in lat_chunk.tolist()]
lon+=res
# Process responses as needed
for response in responses:
data = {
"latitude": response.Latitude(),
"longitude": response.Longitude(),
"date": pd.date_range(
start=pd.to_datetime(response.Hourly().Time(), unit="s", utc=True),
end=pd.to_datetime(response.Hourly().TimeEnd(), unit="s", utc=True),
freq=pd.Timedelta(seconds=response.Hourly().Interval()),
inclusive="left"
)
}
for var in weather_variables:
data[var] = response.Hourly().Variables(weather_variables.index(var)).ValuesAsNumpy()

all_data.append(data)
print(len(lat))
print(all_data[0]["visibility"])
# Create an xarray dataset from the collected data
dataset = xr.Dataset(
{var: (["latitude", "longitude", "date"], np.array(all_data[i][var])) for i,var in zip(range(len(all_data)),weather_variables)},
coords={"latitude": lat, "longitude": lon, "date": np.array(all_data[i]["date"] for i in range((len(all_data))))}
)
return dataset

# Example usage:
fetcher = WeatherDataFetcher()
start_date = "2024-01-01"
end_date = "2024-01-10"
weather_variables = ["temperature_2m", "precipitation", "visibility", "cloud_cover"]
world_grid_data = fetcher.fetch_world_grid_data(start_date, end_date, weather_variables)
print(world_grid_data)
114 changes: 114 additions & 0 deletions OpenMeteo/fetch_OpenMeteo_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import openmeteo_requests # Importing required libraries
import requests_cache
import pandas as pd
from retry_requests import retry

class WeatherDataFetcher:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this be changed the following, so that the name is more descriptive of the data it is getting

Suggested change
class WeatherDataFetcher:
class OpenMeteoWeatherDataFetcher:

BASE_URL = "https://api.open-meteo.com/v1/" # Base URL for OpenMeteo API

def __init__(self):
# Initialize the WeatherDataFetcher class
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after=3600)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
self.openmeteo = openmeteo_requests.Client(session=retry_session)

def fetch_forecast_data(self, NWP, params):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def fetch_forecast_data(self, NWP, params):
def fetch_forecast_data(self, nwp, params):

We don't want to hard code the NWP that we are using. Ideally, we also want type hints for the inputs and outputs.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Understood, I'll add type hints as per your suggestion.

# Fetch weather data from OpenMeteo API for the specified model (NWP) and parameters
url = f"https://api.open-meteo.com/v1/{NWP}" # Construct API URL
try:
responses = self.openmeteo.weather_api(url, params=params) # Get weather data
return responses[0] # Return the first response (assuming only one location)
except openmeteo_requests.OpenMeteoRequestsError as e:
# Handle OpenMeteoRequestsError exceptions
if 'No data is available for this location' in str(e):
print(f"Error: No data available for the location for model '{NWP}'.")
else:
print(f"Error: {e}")
return None

def fetch_historical_data(self, params):
# Fetch historical weather data from OpenMeteo API
BASE_URL = "https://archive-api.open-meteo.com/v1/archive"
try:
responses = self.openmeteo.weather_api(BASE_URL, params=params)
return responses[0] if responses else None
except ValueError as e:
print(f"Error: {e}")
return None

def process_hourly_data(self, response):
# Process hourly data from OpenMeteo API response
# Extract hourly data from the response
hourly = response.Hourly()

# Extract variables
hourly_variables = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally, the variables that are extracted are not hardcoded, but can be passed in as arguments.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

surely will do that, but could you please provide guidance on which variables should be included in the Xarray Dataset?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, this reply slipped through, but I would go with by default, all available ones, and make one of the arguments a list of parameter names. I think there should be a way to get all the available parameters for a model from the API or something?

"temperature_2m": hourly.Variables(0).ValuesAsNumpy(),
"relative_humidity_2m": hourly.Variables(1).ValuesAsNumpy(),
"precipitation": hourly.Variables(2).ValuesAsNumpy(),
"cloud_cover": hourly.Variables(3).ValuesAsNumpy()
}

# Extract time information
time_range = pd.date_range(
start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
freq=pd.Timedelta(seconds=hourly.Interval()),
inclusive="left"
)

# Create a dictionary for hourly data
hourly_data = {"date": time_range}

# Assign each variable to the corresponding key in the dictionary
for variable_name, variable_values in hourly_variables.items():
hourly_data[variable_name] = variable_values

# Create a DataFrame from the dictionary
hourly_dataframe = pd.DataFrame(data=hourly_data)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For this, we want to have the data be returned in an Xarray Dataset, that has coordinates of latitude, longitude, and time_utc, and then the variables and dataarrays in the Dataset.

return hourly_dataframe

def print_location_info(self, response):
# Print location information from OpenMeteo API response
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")


def main():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would be great in the tests folder, as a pytest test! So then we can automatically run this on all code changes.

# Main function to demonstrate usage of WeatherDataFetcher class
fetcher = WeatherDataFetcher() # Create instance of WeatherDataFetcher

# Specify parameters for weather data fetch
NWP = "gfs" # Choose NWP model

# NWP models = ["dwd-icon", "gfs", "ecmwf", "meteofrance", "jma", "metno", "gem", "bom", "cma"]

params = {
"latitude": 40.77, # Latitude of the location
"longitude": -73.91, # Longitude of the location
"hourly": ["temperature_2m", "relative_humidity_2m", "precipitation", "cloud_cover"], # Variables to fetch
"start_date": "2023-12-21", # Start date for data
"end_date": "2024-03-15" # End date for data
}

# Fetch weather data for specified model and parameters
response = fetcher.fetch_forecast_data(NWP, params)

# Print location information
fetcher.print_location_info(response)

# Process and print hourly data
gfs_dataframe = fetcher.process_hourly_data(response)
print(gfs_dataframe)

# Fetch historical weather data
history = fetcher.fetch_historical_data(params)
history_dataframe = fetcher.process_hourly_data(history)
print(history_dataframe)


if __name__ == "__main__":
main() # Call main function if script is executed directly