Skip to content

Commit

Permalink
refactor forecast anomaly function to use range of dates issue #64
Browse files Browse the repository at this point in the history
  • Loading branch information
emmamendelsohn committed Nov 3, 2023
1 parent cf99983 commit 48bf2e0
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 34 deletions.
46 changes: 15 additions & 31 deletions R/calculate_forecasts_anomalies.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ calculate_forecasts_anomalies <- function(ecmwf_forecasts_transformed,
forecasts_transformed_dataset <- open_dataset(ecmwf_forecasts_transformed_directory)

# Get the forecasts anomalies for selected dates, mapping over the lead intervals
lead_intervals_start <- c(0 , lead_intervals[-length(lead_intervals)])
lead_intervals_end <- lead_intervals
lead_intervals_start <- c(0 , lead_intervals[-length(lead_intervals)]) # 0 to include current day in forecast
lead_intervals_end <- lead_intervals - 1 # -1 for 30 days total include start day

anomalies <- map(1:length(lead_intervals_start), function(i){

Expand All @@ -47,7 +47,7 @@ calculate_forecasts_anomalies <- function(ecmwf_forecasts_transformed,
end <- lead_intervals_end[i]

lead_start_date <- date_selected + start
lead_end_date <- date_selected + end
lead_end_date <- date_selected + end

# lead months for subsetting
lead_months <- as.character(c(i, i+1))
Expand All @@ -69,54 +69,38 @@ calculate_forecasts_anomalies <- function(ecmwf_forecasts_transformed,
summarize(lead_mean = sum(mean * weight)/ sum(weight)) |>
ungroup()

# bring in historical means for the relevant days of the year
# lookup with the historical means that are pre generated - this requires averaging over means and SDs
hist_doy <- yday(seq(lead_start_date, lead_end_date-1, by = "day"))
hist_doy_frmt <- str_pad(hist_doy, width = 3, side = "left", pad = "0")
hist_means <- open_dataset(weather_historical_means[str_detect(weather_historical_means, paste(hist_doy_frmt, collapse = "|"))]) |>
select(-day_of_year) |>
group_by(x, y) |>
summarize(across(everything(), mean)) |>
ungroup() |>
collect()
# get historical means for lead period
doy_start <- yday(lead_start_date)
doy_end <- yday(lead_end_date)
doy_start_frmt <- str_pad(doy_start, width = 3, side = "left", pad = "0")
doy_end_frmt <- str_pad(doy_end, width = 3, side = "left", pad = "0")
doy_range <- glue::glue("{doy_start_frmt}_to_{doy_end_frmt}")

historical_means <- read_parquet(weather_historical_means[str_detect(weather_historical_means, doy_range)])
assertthat::assert_that(nrow(historical_means) > 0)

# alternative approach using the actual data - takes 30 sec each run
# tar_load(nasa_weather_transformed)
# hist_doy <- yday(seq(lead_start_date, lead_end_date-1, by = "day"))
# hist_means <- open_dataset(nasa_weather_transformed) |>
# filter(day_of_year %in% hist_doy) |>
# group_by(x, y) |>
# summarize(historical_relative_humidity_mean = mean(relative_humidity),
# historical_temperature_mean = mean(temperature),
# historical_precipitation_mean = mean(precipitation),
# historical_relative_humidity_sd = sd(relative_humidity),
# historical_temperature_sd = sd(temperature),
# historical_precipitation_sd = sd(precipitation)) |>
# ungroup() |>
# collect()

# calculate anomalies - a bit inefficient because arrow doesn't allow reshaping (should have done so in the transform function)
# NAs are expected because forecasts are for the whole continent, weather is just for areas of interest

temp_anomalies <- lead_means |>
filter(short_name == "2t") |>
left_join(hist_means |> select(x, y, contains("temperature")), by = c("x", "y")) |>
left_join(historical_means |> select(x, y, contains("temperature")), by = c("x", "y")) |>
mutate(!!paste0("anomaly_temperature_forecast_", end) := lead_mean - historical_temperature_mean,
!!paste0("anomaly_temperature_scaled_forecast_", end) := (lead_mean - historical_temperature_mean)/historical_temperature_sd) |>
select(-short_name, -lead_mean, -starts_with("historical")) |>
filter(!is.na(!!sym(paste0("anomaly_temperature_forecast_", end))))

rh_anomalies <- lead_means |>
filter(short_name == "rh") |>
left_join(hist_means |> select(x, y, contains("humidity")), by = c("x", "y")) |>
left_join(historical_means |> select(x, y, contains("humidity")), by = c("x", "y")) |>
mutate(!!paste0("anomaly_relative_humidity_forecast_", end) := lead_mean - historical_relative_humidity_mean,
!!paste0("anomaly_relative_humidity_scaled_forecast_", end) := (lead_mean - historical_relative_humidity_mean)/historical_relative_humidity_sd) |>
select(-short_name, -lead_mean, -starts_with("historical")) |>
filter(!is.na(!!sym(paste0("anomaly_relative_humidity_forecast_", end))))

precip_anomalies <- lead_means |>
filter(short_name == "tprate") |>
left_join(hist_means |> select(x, y, contains("precipitation")), by = c("x", "y")) |>
left_join(historical_means |> select(x, y, contains("precipitation")), by = c("x", "y")) |>
mutate(!!paste0("anomaly_precipitation_forecast_", end) := lead_mean - historical_precipitation_mean,
!!paste0("anomaly_precipitation_scaled_forecast_", end) := (lead_mean - historical_precipitation_mean)/historical_precipitation_sd) |>
select(-short_name, -lead_mean, -starts_with("historical")) |>
Expand Down
6 changes: 3 additions & 3 deletions R/calculate_weather_anomalies.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@ calculate_weather_anomalies <- function(nasa_weather_transformed,
weather_transformed_dataset <- open_dataset(nasa_weather_transformed_directory)

# Get the lagged anomalies for selected dates, mapping over the lag intervals
lag_intervals_start <- c(1 , 1+lag_intervals[-length(lag_intervals)])
lag_intervals_end <- lag_intervals
lag_intervals_start <- c(1 , 1+lag_intervals[-length(lag_intervals)]) # 1 to start with previous day
lag_intervals_end <- lag_intervals # 30 days total including end day

anomalies <- map2(lag_intervals_start, lag_intervals_end, function(start, end){

# get lag dates
lag_dates <- seq(date_selected - end, date_selected - start, by = "day")

# Get historical means for DOY
# Get historical means for lag period
doy_start <- yday(lag_dates[1])
doy_end <- yday(lag_dates[length(lag_dates)])
doy_start_frmt <- str_pad(doy_start, width = 3, side = "left", pad = "0")
Expand Down

0 comments on commit 48bf2e0

Please sign in to comment.