Skip to content

Commit

Permalink
progress on #82
Browse files Browse the repository at this point in the history
  • Loading branch information
emmamendelsohn committed Mar 29, 2024
1 parent c17a911 commit 31c224f
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 7 deletions.
29 changes: 29 additions & 0 deletions R/make_mask_lookup.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#' .. content for \description{} (no empty lines) ..
#'
#' .. content for \details{} ..
#'
#' @title
#' @param model_data
#' @return
#' @author Emma Mendelsohn
#' @export
make_mask_lookup <- function(model_dates_selected, rsa_polygon) {

masked_dates_90_days_lookup <- map_dfr(model_dates_selected, function(date){
diffs <- model_dates_selected - date
tibble(date = date, mask = list(model_dates_selected[diffs > 0 & diffs <= 90]))
})

masked_shapes_adjacent_lookup <- map_dfr(1:nrow(rsa_polygon), function(i){
select_shape <- rsa_polygon[i,]
touches <- st_touches(select_shape, rsa_polygon)
touches_shapes <- rsa_polygon[unlist(touches),]
tibble(shape = select_shape$shapeName,
mask = list(touches_shapes$shapeName))
})

list(masked_dates_90_days_lookup = masked_dates_90_days_lookup,
masked_shapes_adjacent_lookup = masked_shapes_adjacent_lookup
)

}
10 changes: 6 additions & 4 deletions _targets.R
Original file line number Diff line number Diff line change
Expand Up @@ -537,17 +537,19 @@ model_targets <- tar_plan(

# Splitting --------------------------------------------------
# Initial train and test (ie holdout)
tar_target(model_data_split, initial_split(model_data, prop = 0.8)), # pick random days left out of the training
tar_target(model_data_split, initial_split(model_data, prop = 0.8)), # pick random days and shapes to be withheld from training
tar_target(training_data, training(model_data_split)),
tar_target(holdout_data, testing(model_data_split)),

# CV splits
# Mask from the training set the three months following the holdout dates for the given district and the surrounding districts.
# Should this be on the whole training set, or just the analysis portion of the training set?
# In other words, it doesn't have to be masked from the model's assessments in the CV routine, we can use that data to assess performance
tar_target(masked_data, get_masks_for_training(holdout_data)),
# In other words, it doesn't have to be masked from the model's assessments in the CV routine, we can still use that data to assess performance
tar_target(mask_lookup, make_mask_lookup(model_dates_selected, rsa_polygon)),
tar_target(holdout_data_masks, holdout_data, mask_lookup), # TODO use mask_lookup against holdout_data to determine which dates/shapes need to be excluded from training
tar_target(training_splits, vfold_cv(training_data)), # subsplit training analysis/assessment
tar_target(training_splits_masked, 1), # subsplit training analysis/assessment
tar_target(training_splits_masked, training_splits, holdout_data_masks), # TODO remove data from holdout_data_masks in analysis splits
# TODO in addition to the above step, we need to get assessment data masks from the splits, and mask these from each assessment split

# Define formula and model
tar_target(model_workflow, build_workflow(training_data)),
Expand Down
8 changes: 5 additions & 3 deletions _targets/meta/meta

Large diffs are not rendered by default.

0 comments on commit 31c224f

Please sign in to comment.