Skip to content

Commit

Permalink
Merge pull request #19 from dblodgett-usgs/main
Browse files Browse the repository at this point in the history
first cut of #18 link dams to nhdplusv2 flowlines
  • Loading branch information
dblodgett-usgs authored Apr 23, 2024
2 parents 4b5831d + f9c28e3 commit ef318b5
Show file tree
Hide file tree
Showing 4 changed files with 226 additions and 109 deletions.
259 changes: 177 additions & 82 deletions R/dam_locations.R
Original file line number Diff line number Diff line change
Expand Up @@ -158,117 +158,212 @@ get_dam_locations <- function(dams, nid) {
feature_data_source)
}

# TODO: work out precise hydrologic locations.
get_hydrologic_locations <- function(dams, hydrologic_locations, nhdpv2_fline,
da_diff_thresh = 0.5, search_radius_m = 500,
max_matches_in_radius = 5) {
utility_link_dams <- function(to_link, dams, nhdpv2_fline, vaa, search_radius_m, max_matches_in_radius) {
new_hl <- nhdplusTools::get_flowline_index(nhdpv2_fline,
to_link,
search_radius = units::set_units(
search_radius_m, "m"),
max_matches = max_matches_in_radius)

v2_area <- select(nhdplusTools::get_vaa(),
nhdpv2_COMID = comid,
nhdpv2_totdasqkm = totdasqkm)
sf::st_drop_geometry(select(to_link, provider_id)) %>%
mutate(id = seq_len(nrow(.))) %>%
left_join(new_hl, by = "id") %>%
left_join(select(sf::st_drop_geometry(dams),
provider_id, drainage_area_sqkm),
by = "provider_id") %>%
left_join(select(sf::st_drop_geometry(nhdpv2_fline),
COMID, drainage_area_sqkm_nhdpv2 = TotDASqKM),
by = "COMID") %>%
left_join(select(vaa, COMID = comid, hydroseq), by = "COMID") %>%
mutate(da_diff = abs(drainage_area_sqkm - drainage_area_sqkm_nhdpv2) / drainage_area_sqkm)
}

get_dam_network_location <- function(dams, update_index, nhdpv2_fline, vaa,
search_radius_m, max_matches_in_radius, index_type) {
linked_dams_base <- select(dams[update_index, ], provider_id, nhdpv2_COMID)

all_gages$nhdpv2_REACHCODE <- NA
all_gages$nhdpv2_REACH_measure <- NA
all_gages$nhdpv2_COMID <- NA
linked_dams <- linked_dams_base %>%
left_join(select(sf::st_drop_geometry(nhdpv2_fline), COMID, REACHCODE, FromMeas),
by = c("nhdpv2_COMID" = "COMID"))

for(hl in hydrologic_locations) {

hl$locations <- hl$locations[hl$locations$provider_id %in% all_gages$provider_id, ]

provider_selector <- all_gages$provider %in% hl$provider

matcher <- match(hl$locations$provider_id,
all_gages$provider_id[provider_selector]
)

all_gages$nhdpv2_REACHCODE[provider_selector][matcher] <-
hl$locations$nhdpv2_REACHCODE
all_gages$nhdpv2_REACH_measure[provider_selector][matcher] <-
hl$locations$nhdpv2_REACH_measure
all_gages$nhdpv2_COMID[provider_selector][matcher] <-
hl$locations$nhdpv2_COMID

# Some gages missing reachcode/measure but have COMID
update_index <- is.na(all_gages$nhdpv2_REACH_measure & !is.na(all_gages$nhdpv2_COMID))

if(any(update_index)) {
linked_gages <- select(all_gages[update_index, ], provider_id, nhdpv2_COMID) %>%
left_join(select(sf::st_drop_geometry(nhdpv2_fline), COMID, FromMeas),
by = c("nhdpv2_COMID" = "COMID"))

all_gages$nhdpv2_REACH_measure[update_index] <-
linked_gages$FromMeas
}
# get precise network location

fline_sites <- dplyr::filter(nhdpv2_fline, COMID %in% linked_dams$nhdpv2_COMID) |>
sf::st_transform(sf::st_crs(linked_dams)) |>
sf::st_cast("LINESTRING")

linked_dams <- utility_link_dams(linked_dams, dams, fline_sites, vaa,
search_radius_m, max_matches_in_radius)

linked_dams_dedup <- linked_dams %>%
left_join(select(sf::st_drop_geometry(dams), provider_id, nhdpv2_COMID), by = "provider_id") %>%
group_by(provider_id) %>%
filter(COMID == nhdpv2_COMID) %>%
ungroup()

if(!any(duplicated(linked_dams_dedup$provider_id))) {
linked_dams_base <- linked_dams_base %>%
left_join(select(linked_dams_dedup, provider_id, REACHCODE, REACH_meas, drainage_area_sqkm_nhdpv2),
by = "provider_id")
} else {
stop()
}

all_gages <- left_join(all_gages, v2_area, by = "nhdpv2_COMID")
if(!nrow(linked_dams_base) == sum(update_index)) stop()

diff_da <- abs(all_gages$nhdpv2_totdasqkm -
all_gages$drainage_area_sqkm) /
all_gages$drainage_area_sqkm
dams$nhdpv2_REACHCODE[update_index] <-
linked_dams_base$REACHCODE
dams$nhdpv2_REACH_measure[update_index] <-
linked_dams_base$REACH_meas

bad_da <- all_gages[!is.na(diff_da) & diff_da > da_diff_thresh, ]
dams$index_type[update_index] <- index_type

update_index <- which(is.na(all_gages$nhdpv2_COMID) |
all_gages$provider_id %in% bad_da$provider_id)
dams
}

#' get dam hydrolocations
#' @param dams sf data.frame table of dams with best estimate of comid and drainage area
#' @param nhdpv2_fline sf data.frame of flowlines to index to
#' @param da_diff_thresh numeric between 0 an 1
#' if the normalized difference between prior estimate
#' drainage area and the NHDPlusV2 modeled drainage area is greater than this,
#' the dam is not indexed to the network.
#' @param search_radius_m numeric distance to search from dam location to flowline
#' @param max_matches_in_radius maximum flowines to consider within search radius
#' @description
#' Given input with estimates of NHDPlusV2 comid and prior estimates of drainage area,
#' this function attempts to figure out which dams should be indexed to the network
#' vs which should be left associated to a comid because they drain a very small area
#' vs which should be left un indexed because uncertainty is too high.
#'
get_dam_hydrolocations <- function(dams, nhdpv2_fline, vaa,
da_diff_thresh = 0.1,
search_radius_m = 500,
max_matches_in_radius = 5) {

dams$nhdpv2_COMID <- as.integer(gsub("https://geoconnex.us/nhdplusv2/comid/", "",
dams$nhdpv2_COMID))

dams$nhdpv2_REACHCODE <- NA
dams$nhdpv2_REACH_measure <- NA

dams$drainage_area_sqkm <- dams$drainage_area_sqkm_nawqa
dams$drainage_area_sqkm[is.na(dams$drainage_area_sqkm)] <-
dams$drainage_area_sqkm_nid[is.na(dams$drainage_area_sqkm)]

dams <- left_join(dams, select(sf::st_drop_geometry(nhdpv2_fline),
nhdpv2_COMID = COMID, drainage_area_sqkm_nhdpv2 = TotDASqKM),
by = "nhdpv2_COMID")

dams$drainage_area_sqkm[dams$drainage_area_sqkm == 0] <- NA

# normalized difference in drainage area using NiD/NAWQA best estimate to normalize
# we can use this to evaluate whether what the COMID models is reasonable compared
# to what the NID and NAWQA has as an estimate.
dams$norm_diffda <- (dams$drainage_area_sqkm - dams$drainage_area_sqkm_nhdpv2) /
dams$drainage_area_sqkm

# these are where we have a reasonable network-drainage area match
update_index <- is.na(dams$nhdpv2_REACH_measure) & !is.na(dams$nhdpv2_COMID) &
(!is.na(dams$norm_diffda) & abs(dams$norm_diffda) < da_diff_thresh)

dams$index_type <- rep(NA_character_, nrow(dams))

if(any(update_index)) {

dams <- get_dam_network_location(dams, update_index, nhdpv2_fline, vaa,
search_radius_m, max_matches_in_radius,
"nawqa_on_network_da_match")
}

no_location <- all_gages[update_index, ]
# these are where we have a small catchment worth indexing to despite a da mismatch
update_index <- is.na(dams$nhdpv2_REACH_measure) & # no reachcode measure yet
!is.na(dams$nhdpv2_COMID) & # has a comid
(!is.na(dams$norm_diffda) & # has a normalized area diff
abs(dams$norm_diffda) > da_diff_thresh & # diff is larger than thresh
dams$drainage_area_sqkm_nhdpv2 < 10) # but this is near a headwater

no_location <- st_transform(no_location, 5070)
if(any(update_index)) {

dams <- get_dam_network_location(dams, update_index, nhdpv2_fline, vaa,
search_radius_m, max_matches_in_radius,
"nawqa_<10sqkm_da_mismatch")
}

new_hl <- nhdplusTools::get_flowline_index(nhdpv2_fline,
no_location,
search_radius = units::set_units(
search_radius_m, "m"),
max_matches = max_matches_in_radius)
# these are where we don't have a network drainage area match but NAWQA assigned a COMID
update_index <- is.na(dams$nhdpv2_REACH_measure) & !is.na(dams$nhdpv2_COMID) &
(!is.na(dams$norm_diffda) & abs(dams$norm_diffda) > da_diff_thresh)

if(any(update_index)) {
dams$index_type[update_index] <- "nawqa_off_network_da_mismatch"
}

linked_gages <- st_drop_geometry(select(no_location, provider_id)) %>%
mutate(id = seq_len(nrow(.))) %>%
left_join(new_hl, by = "id") %>%
left_join(select(st_drop_geometry(all_gages),
provider_id, drainage_area_sqkm),
by = "provider_id") %>%
left_join(v2_area, by = c("COMID" = "nhdpv2_COMID")) %>%
mutate(da_diff = abs(drainage_area_sqkm - nhdpv2_totdasqkm))
# now look at everything where there is no prior COMID estimate
update_index <- which(is.na(dams$nhdpv2_COMID))

no_location <- dams[update_index, ]

linked_gages_dedup <- bind_rows(
linked_gages %>%
no_location <- sf::st_transform(no_location, 5070)

linked_dams <- utility_link_dams(no_location, dams, nhdpv2_fline, vaa,
search_radius_m, max_matches_in_radius) %>%
filter(is.na(da_diff) | da_diff < da_diff_thresh)

linked_dams_dedup <- bind_rows(
linked_dams %>%
group_by(provider_id) %>%
filter(is.na(da_diff)) %>%
filter(offset == min(offset)) %>%
ungroup(),
linked_gages %>%
ungroup() %>%
mutate(index_type = "automatic_network_closest_flowline_no_da_check"),
linked_dams %>%
group_by(provider_id) %>%
filter(!is.na(da_diff)) %>%
filter(da_diff == min(da_diff)) %>%
ungroup()) %>%
ungroup() %>%
mutate(index_type = "automatic_network_closest_drainage_area")) %>%
group_by(provider_id) %>%
filter(hydroseq == min(hydroseq)) %>%
filter(n() == 1) %>%
ungroup()

linked_gages <- select(no_location, provider_id) %>%
linked_dams <- select(no_location, provider_id) %>%
mutate(id = seq_len(nrow(.))) %>%
left_join(select(linked_gages_dedup,
id, COMID, REACHCODE, REACH_meas),
left_join(select(linked_dams_dedup,
id, COMID, REACHCODE, REACH_meas, index_type, drainage_area_sqkm_nhdpv2),
by = "id")

all_gages$nhdpv2_REACHCODE[update_index] <- linked_gages$REACHCODE
all_gages$nhdpv2_REACH_measure[update_index] <- linked_gages$REACH_meas
all_gages$nhdpv2_COMID[update_index] <- linked_gages$COMID
dams$nhdpv2_REACHCODE[update_index] <- linked_dams$REACHCODE
dams$nhdpv2_REACH_measure[update_index] <- linked_dams$REACH_meas
dams$nhdpv2_COMID[update_index] <- linked_dams$COMID
dams$index_type[update_index] <- linked_dams$index_type
dams$drainage_area_sqkm_nhdpv2[update_index] <- linked_dams$drainage_area_sqkm_nhdpv2

dams <- select(dams, -norm_diffda)

all_gages
dams
}

add_mainstems <- function(gage_hydrologic_locations, mainstems, vaa) {
mainstems <- mainstems[,c("id", "uri"), drop = TRUE]
mainstems$id <- as.integer(mainstems$id)
vaa <- right_join(vaa, mainstems, by = c("levelpathi" = "id"))

vaa <- vaa[,c("comid", "uri")]

names(vaa) <- c("comid", "mainstem_uri")
add_mainstems <- function(dam_hydrologic_locations, mainstems, vaa) {
mainstems <- mainstems[,c("head_nhdpv2_COMID", "uri"), drop = TRUE]

mainstems$head_nhdpv2_COMID <- as.integer(gsub("https://geoconnex.us/nhdplusv2/comid/", "",
mainstems$head_nhdpv2_COMID))

# stolen from ref_gages
mainstem_lookup <- group_by(vaa, levelpathi) |>
filter(hydroseq == max(hydroseq)) |>
ungroup() |>
select(head_nhdpv2_COMID = comid, levelpathi) |>
distinct() |>
left_join(mainstems, by = "head_nhdpv2_COMID") |>
filter(!is.na(uri)) |>
select(-head_nhdpv2_COMID) |>
right_join(select(vaa, comid, levelpathi),
by = "levelpathi") |>
select(-levelpathi, comid, mainstem_uri = uri)

dplyr::left_join(dam_hydrologic_locations, mainstem_lookup,
by = c("nhdpv2_COMID" = "comid"))

left_join(gage_hydrologic_locations, vaa,
by = c("nhdpv2_COMID" = "comid"))
}
13 changes: 13 additions & 0 deletions R/get_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,16 @@ get_nid_csv <- function(out_file = "data/nation.csv",
readr::read_csv(out_file, skip = 1)
}

get_all_mainstems <- function(outdir) {
url <- "https://www.hydroshare.org/resource/3cc04df349cd45f38e1637305c98529c/data/contents/mainstems.gpkg"

dir.create(outdir, recursive = TRUE, showWarnings = FALSE)

f <- file.path(outdir, basename(url))

if(!file.exists(f)) {
download.file(url, destfile = f, mode = "wb")
}

sf::read_sf(f)
}
17 changes: 16 additions & 1 deletion R/write_out.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,26 @@
write_reference <- function(dam_locations, registry, providers, reference_file, nldi_file) {

if(is.numeric(dam_locations$nhdpv2_COMID)) dam_locations$nhdpv2_COMID <-
vapply(dam_locations$nhdpv2_COMID, \(x) {
if(is.na(x)) NA_character_ else paste0("https://geoconnex.us/nhdplusv2/comid/", x)
}, FUN.VALUE = c(""))

if(all(is.na(dam_locations$nhdpv2_REACHCODE) | !grepl("https", dam_locations$nhdpv2_REACHCODE))) {
dam_locations$nhdpv2_REACHCODE_uri <-
vapply(dam_locations$nhdpv2_REACHCODE, \(x) {
if(is.na(x)) NA_character_ else paste0("https://geoconnex.us/nhdplusv2/reachcode/", x)
}, FUN.VALUE = c(""))
}

out <- dam_locations %>%
mutate(identifier = paste0(provider, provider_id)) %>%
left_join(select(convert_provider_id(registry, providers),
uri, identifier, id), by = "identifier") %>%
select(id, uri, name, description, subjectOf,
provider, provider_id, nhdpv2_COMID, feature_data_source) %>%
provider, provider_id,
nhdpv2_COMID, nhdpv2_REACHCODE_uri, nhdpv2_REACHCODE, nhdpv2_REACH_measure,
drainage_area_sqkm, drainage_area_sqkm_nhdpv2, index_type,
mainstem_uri, feature_data_source) %>%
mutate(id = as.integer(id))

write_sf(out, reference_file)
Expand Down
Loading

0 comments on commit ef318b5

Please sign in to comment.