Select_GCMs_Centroid.Rmd

---
title: "Select GCMS from Centroid Climate Data"
author: "Caitlin Mothes and Katie Willi"
date: "`r Sys.Date()`"
output:
  html_document:
    toc: true
    toc_float: true
    theme: paper
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE,
                      warning = FALSE,
                      error = FALSE,
                      message = FALSE)
                      #cache = TRUE)

source("setup.R")
```

# Workflow to process MACA climate centroid data

**Codebase modified from <https://github.com/nationalparkservice/CCRP_automated_climate_futures>, led by Amber Runyon.**

*The below workflow is designed for exploration of one park at a time, then later on one centroid file at a time. To run this code for a single or multiple parks use the `select_GCMs()`function. This was applied to all parks at the the end of this .Rmd.*

Function to pull in all park centroid files:

```{r}
get_files <- function(park) {
  walk(list.files(
    paste0("data/park/", park, "/centroid/climate"),
    full.names = TRUE
  ),
  function(x) {
    tmp <- read_csv(x)
    # hacky way to pull filename to assign to env object
    name <-  str_sub(x, 33,-5)
    assign(name, tmp, envir = .GlobalEnv)
  })
}
```

## Get data for park:

```{r}
park <- "BRCA"

## get all files
get_files("BRCA")

## create list of future and historic dfs
## MAKE SURE no other objects with '_future' or '_historical' in their names
future_dfs <- mget(ls(pattern = "_future"))

historic_dfs <- mget(ls(pattern = "_historical"))

```

## Set up parameters

```{r}
# list of CF categories
CFs_all <- c("Warm Wet", "Hot Wet", "Central", "Warm Dry", "Hot Dry")

# method to use to filter final set of selected GCMs
method <- "pca" 

# Percentage of models to drop from ranking:
Percent_skill_cutoff = .1

# Threshold percentages for defining Climate futures. Default low/high:  0.25, 0.75
CFLow = 0.25     
CFHigh = 0.75

```

## Clean data

Following methods in draft Climate report from Dave, "Methods for assessing climate change exposure for national park planning"; Runyon et al. 2023.

*Change the index number (`[[i]]`* ) *to change which centroid is explored.*

```{r}
# future data, filter to 2035-2065 (2050 mean)
future_all <- future_dfs[[1]] %>%
  dplyr::rename(precip_in = `Precip (in)`,
         tmin_f = `Tmin (F)`,
         tmax_f = `Tmax (F)`,
         rhmax = `RHmax (%)`,
         rhmin = `RHmin(%)`,
         tavg_f = `Tavg (F)`) %>% 
  mutate(
    year = format(Date, "%Y"),
    #VPD = VPD(tmin_f, tmax_f, rhmin, rhmax), # do we need vapor pressure??
    DOY = yday(Date)
  ) %>% 
  filter(year %in% 2035:2065)
  

# historic data, filter to 1979-2012 baseline
historic_all <- historic_dfs[[1]] %>%
 dplyr::rename(precip_in = `Precip (in)`,
         tmin_f = `Tmin (F)`,
         tmax_f = `Tmax (F)`,
         rhmax = `RHmax (%)`,
         rhmin = `RHmin(%)`,
         tavg_f = `Tavg (F)`) %>% 
  mutate(
    year = format(Date, "%Y"),
    #VPD = VPD(tmin_f, tmax_f, rhmin, rhmax),
    DOY = yday(Date)
  ) %>% 
  filter(year %in% 1979:2012)
```

## Low skill models

```{r}
# Determine low-skill models using list created from Rupp et al. 2016 (this is from the CCRP team)

# assign region of park (one of SWR, SER, PWR or 'mean' if none of these)
region <- "SER"


low_skill_models <-
  read_delim( "https://raw.githubusercontent.com/rossyndicate/CCRP_automated_climate_futures/master/data/general/GCM_skill_by_region.txt" ) %>%
  filter(if (region %in% Region) {
    Region == region
  } else {
    Region == "mean"
  }) %>%
  # remove period at end of GCM names (will need later)
  mutate(GCM = str_sub(GCM, 1, -2)) %>% 
  # Worse models have higher value rank
  slice_max(n = length(unique(future_all$GCM)) / 2 * Percent_skill_cutoff,
            order_by  = Rank)

```

## Calculate Deltas

Calculate all baseline values, averages, and change from baseline to 2050 average.

```{r}

# baseline means from historic data
baseline <- historic_all %>% 
  summarise(baseline_pr = mean(precip_in),
            baseline_tmax = mean(tmax_f),
            baseline_tmin = mean(tmin_f),
            baseline_tavg = mean(tavg_f),
            baseline_rhmax = mean(rhmax),
            baseline_rhmin = mean(rhmin))


# future means for each GCM
future_means <- future_all %>% 
  group_by(GCM) %>% 
  summarise_at(vars(precip_in:tavg_f), mean, na.rm = TRUE) %>% 
  # add delta columns using baseline values
  mutate(delta_pr = precip_in - baseline$baseline_pr,
         delta_tmax = tmax_f - baseline$baseline_tmax,
         delta_tmin = tmin_f - baseline$baseline_tmin,
         delta_tavg = tavg_f - baseline$baseline_tavg,
         delta_rhmax = rhmax - baseline$baseline_rhmax,
         delta_rhmin = rhmin - baseline$baseline_rhmin) %>% 
  # remove low skill models
   separate_wider_delim(GCM, 
                       names = c("GCM_only", "RCP"),
           delim = ".",
           cols_remove  = FALSE) %>% 
  filter(!GCM_only %in% low_skill_models$GCM)
  

```

## Assign Climate Future Categories

```{r}
#### Set limits for CF classification
Pr0 <-  as.numeric(quantile(future_means$delta_pr, 0))
Pr25 <-  as.numeric(quantile(future_means$delta_pr, 0.25))
PrAvg <-  mean(future_means$delta_pr)
Pr75 <-  as.numeric(quantile(future_means$delta_pr, 0.75))
Pr100 <-  as.numeric(quantile(future_means$delta_pr, 1))
Tavg0 <-  as.numeric(quantile(future_means$delta_tavg, 0))
Tavg25 <-  as.numeric(quantile(future_means$delta_tavg, 0.25)) 
Tavg <-  mean(future_means$delta_tavg)
Tavg75 <-  as.numeric(quantile(future_means$delta_tavg, 0.75))
Tavg100 <-  as.numeric(quantile(future_means$delta_tavg, 1))

# CF assignment
future_means <- future_means %>%
  # designate climate future classification based on cf limits
  mutate(
    CF = case_when(
      delta_tavg < Tavg &
        delta_pr > Pr75 |
        delta_tavg < Tavg25 & delta_pr > PrAvg ~ "Warm Wet",
      delta_tavg > Tavg &
        delta_pr > Pr75 |
        delta_tavg > Tavg75 & delta_pr > PrAvg ~ "Hot Wet",
      delta_tavg > Tavg25 &
        delta_tavg < Tavg75 &
        delta_pr > Pr25 & delta_pr < Pr75 ~ "Central",
      delta_tavg < Tavg &
        delta_pr < Pr25 |
        delta_tavg < Tavg25 & delta_pr < PrAvg ~ "Warm Dry",
      delta_tavg > Tavg &
        delta_pr < Pr25 |
        delta_tavg > Tavg75 & delta_pr < PrAvg ~ "Hot Dry"
    )
  )


```

## Corners Method

```{r}
#### Select Corner GCMs, assuming temp on x and precip on y
lx = min(future_means$delta_tavg)
ux = max(future_means$delta_tavg)
ly = min(future_means$delta_pr)
uy = max(future_means$delta_pr)

  #convert to points
ww = c(lx,uy)
wd = c(lx,ly)
hw = c(ux,uy)
hd = c(ux,ly)

corners <- future_means %>%
  # calculate euclidiean distance of each point/model from each corner
  mutate(
    ww_dist = sqrt((delta_tavg - ww[1]) ^ 2 + (delta_pr - ww[2]) ^ 2),
    wd_dist = sqrt((delta_tavg - wd[1]) ^ 2 + (delta_pr - wd[2]) ^ 2),
    hw_dist = sqrt((delta_tavg - hw[1]) ^ 2 + (delta_pr - hw[2]) ^ 2),
    hd_dist =  sqrt((delta_tavg - hd[1]) ^ 2 + (delta_pr - hd[2]) ^ 2)
  )


  # assign CF to each selected corner model
future_means <- future_means %>%
  mutate(
    corners = case_when(
      GCM == filter(corners, CF == "Warm Wet") %>% slice(which.min(ww_dist)) %>% .$GCM ~ "Warm Wet",
      GCM == filter(corners, CF == "Warm Dry") %>% slice(which.min(wd_dist)) %>% .$GCM ~ "Warm Dry",
      GCM == filter(corners, CF == "Hot Wet") %>% slice(which.min(hw_dist)) %>% .$GCM ~ "Hot Wet",
      GCM == filter(corners, CF == "Hot Dry") %>% slice(which.min(hd_dist)) %>% .$GCM ~ "Hot Dry"
      
    )
  )

```

## PCA Method

Using just change in precip and avg temp (Runyon et al. methods), may want to explore other variables for more in-depth analysis at the park level.

```{r}
# set up for PCA
future_pca_1 <- future_means %>% 
  dplyr::select(GCM, delta_pr, delta_tavg) %>% 
  # set up for prcomp
  column_to_rownames(var = 'GCM')


pca_1 <- prcomp(future_pca_1, center = TRUE, scale. = TRUE) 

# quick plot
autoplot(pca_1, data = future_pca_1, loadings = TRUE,label=TRUE)

# get dataframe
pca_1_df <- as.data.frame(pca_1$x)


#Take the min/max of each of the PCs
PCs <- pca_1_df %>% 
 filter(PC1 == min(PC1) |
        PC1 == max(PC1) |
        PC2 == min(PC2) |
        PC2 == max(PC2)) %>% 
  rownames_to_column(var = "GCM")


#Assigns CFs to diagonals
diagonals <-
  rbind(
    data.frame(CF = CFs_all[c(1, 5)], diagonals = "diagonal1"),
    data.frame(CF = CFs_all[c(4, 2)], diagonals = "diagonal2")
  )


PCA <-
  future_means %>% filter(GCM %in% PCs$GCM) %>% left_join(diagonals, by = "CF") %>% right_join(PCs, by = "GCM")

# create column with selected pca models
future_means <- future_means %>%
  mutate(pca = if_else(GCM %in% PCs$GCM,
                       CF,
                       NA))


```

Handling missing and/or extra quadrats

```{r}

# function to deal with redundant quadrat
ID.redundant.gcm <- function(PCA){
  redundant.diag = count(PCA$diagonals)$x[which(count(PCA$diagonals)$freq ==
                                                           1)] #ID redundant diagonal
  PC.foul = PCA$PC[which(PCA$diagonals == redundant.diag)] #ID which PC has the redundant diagonal
  PCA$GCM[which(PCA$PC == PC.foul &
                  PCA$GCM != PCA$GCM[which(PCA$diagonals == redundant.diag)])] #ID GCM that is in both the  redundant diagonal and the duplicative PC
}

 #if a quadrant is missing
if(length(setdiff(CFs_all[CFs_all != "Central"], future_means$pca)) > 0) {
  #assign corners selection to that CF
  future_means$pca[which(future_means$corners == setdiff(CFs_all[CFs_all != "Central"], future_means$pca))] = setdiff(CFs_all[CFs_all != "Central"], future_means$pca)
   #If there is a redundant GCM
  if (nrow(PCA[duplicated(PCA$GCM),]) > 0) {
   
    future_means$pca = future_means$pca #Do nothing - otherwise end up with empty quadrant. This line could be removed and make the previous statment inverse but it makes it more confusing what's gonig on that way
  } else {
    future_means$pca[which(future_means$GCM == ID.redundant.gcm(PCA))] = NA #Removes the GCM that is in redundant diagonal
  }
}


```

## Return Selected Models

```{r}
# return selected methods based on method identified in parameters
selected_gcms <- future_means %>% 
  drop_na(method) %>% 
  dplyr::select(GCM, CF)
```

### Figures

From <https://github.com/nationalparkservice/CCRP_automated_climate_futures/blob/master/scripts/Scatter_and_diagnostic.R>

```{r}
library(ggrepel)

Longx<- "annual average temperature (F)"
Longy<- "annual average precipitation (in)"
x <- "DeltaTavg"
y <- "DeltaPr"


# No color
dualscatter = ggplot(future_means, aes(delta_tavg, delta_pr*365, xmin=Tavg25, xmax=Tavg75, ymin=Pr25*365, ymax=Pr75*365))

dualscatter  + geom_text_repel(aes(label=GCM)) +
  geom_point(colour="black",size=4) +
  theme(axis.text=element_text(size=16),
        axis.title.x=element_text(size=16,vjust=-0.2),
        axis.title.y=element_text(size=16,vjust=0.2),
        plot.title=element_text(size=20,face="bold",vjust=2,hjust=0.5),
        legend.text=element_text(size=16), legend.title=element_text(size=16)) + 
  ###
  labs(title =paste(park," Changes in climate means in ", 2050, " by GCM run",sep=""), 
       x = paste("Changes in ",Longx,sep=""), # Change
       y = paste("Changes in ",Longy,sep="")) + #change
  scale_color_manual(name="Scenarios", values=c("black")) +
  # scale_fill_manual(name="Scenarios",values = c("black")) + 
  theme(legend.position="none") +
  geom_rect(color = "black", alpha=0) + 
  geom_hline(aes(yintercept=mean(delta_pr*365)),linetype=2) + #change
  geom_vline(aes(xintercept=mean(delta_tavg)),linetype=2) 
```

Presentation Figure:

```{r}
###Scatter plot showing delta precip and tavg, color by emissions scenario, with box for central CF

ggplot(
  future_means,
  aes(
    delta_tavg,
    365 * delta_pr,
    xmin = Tavg25,
    xmax = Tavg75,
    ymin = 365 * Pr25,
    ymax = 365 * Pr75
  )
) + geom_point(aes(shape = RCP, color = CF), size = 4) +
  #PlotTheme +
  labs(
    title = paste(park, "- Changes in climate means in", 2050 , "by GCM run"),
    x = "Change in annual average temperature (F)",
    y = "Change in average annual precipitation (in)"
  ) +
  scale_colour_manual(values = c("gray", "darkred", "darkblue", "pink", "lightblue")) +
  guides(color = guide_legend(title = "Climate Future")) +
  
  #geom_point(colour="black",size=4) +
  theme(
    axis.text = element_text(size = 16),
    axis.title.x = element_text(size = 16, vjust = -0.2),
    axis.title.y = element_text(size = 16, vjust = 0.2),
    plot.title = element_text(
      size = 20,
      face = "bold",
      vjust = 2,
      hjust = 0.5
    ),
    legend.text = element_text(size = 16),
    legend.title = element_text(size = 16)
  ) +
  geom_rect(color = "black", alpha = 0) +
  geom_hline(aes(yintercept = 365 * mean(delta_pr)), linetype = 2) +
  geom_vline(aes(xintercept = mean(delta_tavg)), linetype = 2) +
  # highlight high/low runoff models
  geom_point(
    data = filter(
      future_means,
      GCM %in% c("CSIRO-Mk3-6-0.rcp45", "MIROC-ESM-CHEM.rcp85")
    ),
    aes(delta_tavg, 365 * delta_pr),
    shape = 1,
    color = "yellow",
    size = 6,
    stroke = 3
  ) +
  geom_mark_circle(data = filter(future_means, GCM %in% selected_gcms$GCM),
                    aes(group = GCM), expand = 0.025, size = 1, linetype = 2) + 
  geom_text_repel(aes(label = GCM))
   

```

## Test Function

```{r}
brca_gcms <- select_GCMs("BRCA",
           region = "SWR",
           future_range = 2035:2065,
           historic_range = 1979:2012,
           low_skill_cutoff = 0.1,
           method = "pca",
           save = FALSE)
```

Inspect these results:

```{r}
brca_gcms %>% 
  ggplot(aes(x = delta_tavg, y = delta_pr)) + 
  geom_point(aes(color = CF), size = 4) +
  geom_text_repel(aes(label=paste(centroid, GCM)))
```

# Park wide GCM selection

Read in and clean list of parks and region codes (need to filter to just Conus for centroids)

```{r}

#download a list of all park names and attributes
parks <- getParkSystem()

# clean file and join to list that has region codes
parks_filtered <- parks %>%
  st_drop_geometry() %>%
  # remove non  conus
  filter(!STATE %in% c("AK", "AS", "HI", "MP", "PR", "VI")) %>%
  distinct(STATE, UNIT_CODE)


nps_list <- readxl::read_xlsx("data/NPS-Unit-List.xlsx") %>% 
  # remove first row metadata
  slice(-1) %>% 
  janitor::clean_names() %>% 
  dplyr::select(UNIT_CODE = park_code, region) %>% 
  distinct() %>% 
  inner_join(parks_filtered, by = "UNIT_CODE")
```

## Select GCMs for all parks

```{r}
# small subset to test
#nps_list_test <- nps_list %>% slice(33:34)

# first remove any existing "_future" and "_historical" objects from environment
rm(list=ls(pattern = "_future|_historical"))


# initiate error handling
saferun <- safely(.f = select_GCMs)

# parkwide_gcms <- map2(nps_list$UNIT_CODE,
#                          nps_list$region,
#                          ~ saferun(park = .x, region = .y))

# rerun only for 13 WBM GCMs
parkwide_gcms <- map(parks_filtered$UNIT_CODE,
                         ~ saferun(park = .x))


```

```{r}
# remove errors / 7 parks threw errors
parkwide_gcm_clean <- parkwide_gcms %>% map("result") %>% 
  compact() %>%
  bind_rows()

#write_csv(parkwide_gcm_clean, "data/parkwide_GCMs.csv")

# save new file using filtered WBM GCMs
write_csv(parkwide_gcm_clean, "data/parkwide_GCMs_WBM_filtered.csv")
```