Skip to content

Commit

Permalink
Merge pull request #272 from wdwatkins/NGWMN
Browse files Browse the repository at this point in the history
NGWMN getObservation
  • Loading branch information
ldecicco-USGS authored Aug 26, 2016
2 parents a58589f + d449ee1 commit 9c00e51
Show file tree
Hide file tree
Showing 9 changed files with 468 additions and 1 deletion.
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,15 @@ export(constructWQPURL)
export(countyCd)
export(countyCdLookup)
export(getWebServiceData)
export(importNGWMN_wml2)
export(importRDB1)
export(importWQP)
export(importWaterML1)
export(importWaterML2)
export(pCodeToName)
export(parameterCdFile)
export(readNGWMNlevels)
export(readNGWMNsites)
export(readNWISdata)
export(readNWISdv)
export(readNWISgwl)
Expand Down Expand Up @@ -47,6 +50,7 @@ importFrom(dplyr,arrange)
importFrom(dplyr,bind_rows)
importFrom(dplyr,full_join)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,mutate_)
importFrom(dplyr,mutate_each_)
importFrom(dplyr,rbind_all)
Expand Down
138 changes: 138 additions & 0 deletions R/importNGWMN_wml2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#' Function to return data from the National Ground Water Monitoring Network waterML2 format
#'
#' This function accepts a url parameter for a WaterML2 getObservation. This function is still under development,
#' but the general functionality is correct.
#'
#' @param input character or raw, containing the url for the retrieval or a path to the data file, or raw XML.
#' @param asDateTime logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, character
#' @param tz character to set timezone attribute of datetime. Default is an empty quote, which converts the
#' datetimes to UTC (properly accounting for daylight savings times based on the data's provided time zone offset).
#' Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles",
#' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla"
#' @return mergedDF a data frame source, time, value, uom, uomTitle, comment, gmlID
#' @export
#' @importFrom xml2 read_xml
#' @importFrom xml2 xml_find_all
#' @importFrom xml2 xml_text
#' @importFrom xml2 xml_attr
#' @importFrom lubridate parse_date_time
#' @examples
#' \dontrun{
#' url <- "http://cida.usgs.gov/ngwmn_cache/sos?request=GetObservation&service=SOS&version=2.0.0
#' &observedProperty=urn:ogc:def:property:OGC:GroundWaterLevel&responseFormat=text/xml&featureOf
#' Interest=VW_GWDP_GEOSERVER.USGS.403836085374401"
#' data <- importNGWMN_wml2(url)
#'
#' url <- "http://cida.usgs.gov/ngwmn_cache/sos?request=GetObservation&service=SOS&version=2.0.0
#' &observedProperty=urn:ogc:def:property:OGC:GroundWaterLevel&responseFormat=text/xml&featureOf
#' Interest=VW_GWDP_GEOSERVER.USGS.474011117072901"
#' data <- importNGWMN_wml2(url)
#' }
#'
importNGWMN_wml2 <- function(input, asDateTime=FALSE, tz=""){
if(tz != ""){
tz <- match.arg(tz, c("America/New_York","America/Chicago",
"America/Denver","America/Los_Angeles",
"America/Anchorage","America/Honolulu",
"America/Jamaica","America/Managua",
"America/Phoenix","America/Metlakatla"))
}else{tz = "UTC"}

raw <- FALSE
if(class(input) == "character" && file.exists(input)){
returnedDoc <- read_xml(input)
}else if(class(input) == 'raw'){
returnedDoc <- read_xml(input)
raw <- TRUE
} else {
returnedDoc <- xml_root(getWebServiceData(input, encoding='gzip'))
}

response <- xml_name(returnedDoc)
if(response == "GetObservationResponse"){

timeSeries <- xml_find_all(returnedDoc, "//wml2:MeasurementTimeseries") #each parameter/site combo

if(0 == length(timeSeries)){
df <- data.frame()
if(!raw){
attr(df, "url") <- input
}
return(df)
}

mergedDF <- NULL

for(t in timeSeries){
gmlID <- xml_attr(t,"id")
TVP <- xml_find_all(t, ".//wml2:MeasurementTVP")#time-value pairs
rawTime <- xml_text(xml_find_all(TVP,".//wml2:time"))

valueNodes <- xml_find_all(TVP,".//wml2:value")
values <- as.numeric(xml_text(valueNodes))
nVals <- length(values)
gmlID <- rep(gmlID, nVals)

#df of date, time, dateTime
oneCol <- rep(NA, nVals)
timeDF <- data.frame(date=oneCol, time=oneCol, dateTime=oneCol)
splitTime <- data.frame(matrix(unlist(strsplit(rawTime, "T")), nrow=nVals, byrow = TRUE), stringsAsFactors=FALSE)
names(splitTime) <- c("date", "time")
timeDF <- mutate(splitTime, dateTime = NA)
logicVec <- nchar(rawTime) > 19
timeDF$dateTime[logicVec] <- rawTime[logicVec]
if(asDateTime){
timeDF$dateTime <- parse_date_time(timeDF$dateTime, c("%Y","%Y-%m-%d","%Y-%m-%dT%H:%M","%Y-%m-%dT%H:%M:%S",
"%Y-%m-%dT%H:%M:%OS","%Y-%m-%dT%H:%M:%OS%z"), exact = TRUE)
#^^setting tz in as.POSIXct just sets the attribute, does not convert the time!
attr(time, 'tzone') <- tz
}



uom <- xml_attr(valueNodes, "uom", default = NA)
source <- xml_attr(xml_find_all(TVP, ".//wml2:source"), "title")
comment <- xml_text(xml_find_all(TVP, ".//wml2:comment"))

df <- cbind.data.frame(source, timeDF, value=values, uom, comment, gmlID,
stringsAsFactors=FALSE)
if (is.null(mergedDF)){
mergedDF <- df
} else {
similarNames <- intersect(colnames(mergedDF), colnames(df))
mergedDF <- full_join(mergedDF, df, by=similarNames)
}
}

if(!raw){
url <- input
attr(mergedDF, "url") <- url
}
mergedDF$date <- as.Date(mergedDF$date)
nonDateCols <- grep("date",names(mergedDF), value=TRUE, invert = TRUE)

mergedDF[nonDateCols][mergedDF[nonDateCols] == "" | mergedDF[nonDateCols]== -999999.0] <- NA
attr(mergedDF, "gml:identifier") <- xml_text(xml_find_all(returnedDoc, ".//gml:identifier"))
attr(mergedDF, "generationDate") <- xml_text(xml_find_all(returnedDoc, ".//wml2:generationDate"))


}else if(response == "GetFeatureOfInterestResponse"){
site <- xml_text(xml_find_all(returnedDoc,".//gml:identifier"))
site <- substring(site, 8)

#bandaid to work with only single site calls
#TODO: need better solution when bbox is added
siteDesc <- xml_text(xml_find_all(returnedDoc, ".//gml:description"))
if(length(siteDesc) == 0){
siteDesc <- NA
}

siteLocs <- strsplit(xml_text(xml_find_all(returnedDoc, ".//gml:pos")), " ")
siteLocs <- data.frame(dec_lat_va=as.numeric(siteLocs[[1]][1]), dec_lon_va=as.numeric(siteLocs[[1]][2]), stringsAsFactors = FALSE)
mergedDF <- cbind.data.frame(site, description = siteDesc, siteLocs, stringsAsFactors = FALSE)
}
else{
stop("Unrecognized response from the web service")
}
return(mergedDF)
}
209 changes: 209 additions & 0 deletions R/readNGWMNdata.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
#' import data from the National Groundwater Monitoring Network \url{http://cida.usgs.gov/ngwmn/}.
#'
#' Only water level data is currently available through the web service.
#' @param asDateTime logical if \code{TRUE}, will convert times to POSIXct format. Currently defaults to
#' \code{FALSE} since time zone information is not included.
#' @param featureID character Vector of feature IDs in the formatted with agency code and site number
#' separated by a period, e.g. \code{USGS.404159100494601}.
#' @param service character Identifies which web service to access. Only \code{observation} is currently
#' supported, which retrieves all water level for each site.
#' @param tz character to set timezone attribute of datetime. Default is an empty quote, which converts the
#' datetimes to UTC (properly accounting for daylight savings times based on the data's provided time zone offset).
#' Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles",
#' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla"
#' @import utils
#' @importFrom dplyr mutate
#' @importFrom dplyr bind_rows
#'
#' @examples
#' \dontrun{
#' #one site
#' site <- "USGS.430427089284901"
#' oneSite <- readNGWMNdata(featureID = site)
#'
#' #multiple sites
#' sites <- c("USGS.272838082142201","USGS.404159100494601", "USGS.401216080362703")
#' multiSiteData <- readNGWMNdata(sites)
#'
#' #non-USGS site
#' site <- "MBMG.892195"
#' data <- readNGWMNdata(featureID = site)
#'
#' #site with no data returns empty data frame
#' noDataSite <- "UTGS.401544112060301"
#' noDataSite <- readNGWMNdata(featureID = noDataSite, service = "observation")
#' }
#'

readNGWMNdata <- function(featureID, service = "observation", asDateTime = TRUE, tz = ""){
message(" ********************************************************
DISCLAIMER: NGWMN retrieval functions are still in flux,
and no future behavior or output is guaranteed
*********************************************************")
match.arg(service, c("observation", "featureOfInterest"))


if(service == "observation"){
allObs <- NULL
allAttrs <- NULL
allSites <- NULL
#these attributes are pulled out and saved when doing binds to be reattached
attrs <- c("url","gml:identifier","generationDate")
for(f in featureID){
obsFID <- retrieveObservation(f, asDateTime, attrs)
siteFID <- retrieveFeatureOfInterest(f, asDateTime)
if(is.null(allObs)){
allObs <- obsFID
allSites <- siteFID
allAttrs <- saveAttrs(attrs, allObs)
}else{
obsFIDatt <- saveAttrs(attrs, obsFID)
obsFID <- removeAttrs(attrs, obsFID)
allAttrs <- bind_rows(allAttrs, obsFIDatt)
allObs <- bind_rows(allObs, obsFID)
allSites <- bind_rows(allSites, siteFID)
}
attributes(allObs) <- c(attributes(allObs),as.list(allAttrs))
attr(allObs, "siteInfo") <- allSites
returnData <- allObs
}
}else if(service == "featureOfInterest"){
allSites <- NULL
for(f in featureID){
siteFID <- retrieveFeatureOfInterest(f, asDateTime)
if(is.null(allSites)){
allSites <- siteFID
}else{
allSites <- bind_rows(allSites, siteFID)
}
}
returnData <- allSites
}else{
stop("unrecognized service request")
}

return(returnData)
}

#' Retrieve groundwater levels from the National Ground Water Monitoring Network \url{http://cida.usgs.gov/ngwmn/}.
#'
#' @param featureID character Vector of feature IDs in the formatted with agency code and site number
#' separated by a period, e.g. \code{USGS.404159100494601}.
#'
#' @export
#'
#' @examples
#' \dontrun{
#' #one site
#' site <- "USGS.430427089284901"
#' oneSite <- readNGWMNlevels(featureID = site)
#'
#' #multiple sites
#' sites <- c("USGS.272838082142201","USGS.404159100494601", "USGS.401216080362703")
#' multiSiteData <- readNGWMNlevels(sites)
#'
#' #non-USGS site
#' site <- "MBMG.892195"
#' data <- readNGWMNlevels(featureID = site)
#'
#' #site with no data returns empty data frame
#' noDataSite <- "UTGS.401544112060301"
#' noDataSite <- readNGWMNlevels(featureID = noDataSite)
#' }

readNGWMNlevels <- function(featureID){
data <- readNGWMNdata(featureID, service = "observation")
return(data)
}

#' Retrieve site data from the National Ground Water Monitoring Network \url{http://cida.usgs.gov/ngwmn/}.
#'
#' @param featureID character Vector of feature IDs in the formatted with agency code and site number
#' separated by a period, e.g. \code{USGS.404159100494601}.
#'
#' @export
#' @return A data frame the following columns:
#' #' \tabular{lll}{
#' Name \tab Type \tab Description \cr
#' site \tab char \tab Site FID \cr
#' description \tab char \tab Site description \cr
#' dec_lat_va, dec_lon_va \tab numeric \tab Site latitude and longitude \cr
#' }
#' @examples
#' \dontrun{
#' #one site
#' site <- "USGS.430427089284901"
#' oneSite <- readNGWMNsites(featureID = site)
#'
#' #multiple sites
#' sites <- c("USGS.272838082142201","USGS.404159100494601", "USGS.401216080362703")
#' multiSiteInfo <- readNGWMNsites(sites)
#'
#' #non-USGS site
#' site <- "MBMG.892195"
#' siteInfo <- readNGWMNsites(featureID = site)
#'
#' }

readNGWMNsites <- function(featureID){
sites <- readNGWMNdata(featureID, service = "featureOfInterest")
return(sites)
}



retrieveObservation <- function(featureID, asDateTime, attrs){
#will need to contruct this more piece by piece if other versions, properties are added
baseURL <- "http://cida.usgs.gov/ngwmn_cache/sos?request=GetObservation&service=SOS&version=2.0.0&observedProperty=urn:ogc:def:property:OGC:GroundWaterLevel&responseFormat=text/xml&featureOfInterest=VW_GWDP_GEOSERVER."
url <- paste0(baseURL, featureID)

returnData <- importNGWMN_wml2(url, asDateTime)
if(nrow(returnData) == 0){
#need to add NA attributes, so they aren't messed up when stored as DFs
attr(returnData, "gml:identifier") <- NA
attr(returnData, "generationDate") <- NA
}

#mutate removes the attributes, need to save and append
attribs <- saveAttrs(attrs, returnData)
if(nrow(returnData) > 0){
#tack on site number
siteNum <- rep(sub('.*\\.', '', featureID), nrow(returnData))
returnData <- mutate(returnData, site = siteNum)
numCol <- ncol(returnData)
returnData <- returnData[,c(numCol,1:(numCol - 1))] #move siteNum to the left
}
attributes(returnData) <- c(attributes(returnData), as.list(attribs))

return(returnData)
}

#retrieve feature of interest
#don't expose until can support bbox
#note: import function can only do single sites right now
retrieveFeatureOfInterest <- function(featureID, asDateTime){
baseURL <- "http://cida.usgs.gov/ngwmn_cache/sos?request=GetFeatureOfInterest&service=SOS&version=2.0.0&observedProperty=urn:ogc:def:property:OGC:GroundWaterLevel&responseFormat=text/xml&featureOfInterest=VW_GWDP_GEOSERVER."
url <- paste0(baseURL, featureID)
siteDF <- importNGWMN_wml2(url, asDateTime)
return(siteDF)
}


#save specified attributes from a data frame
saveAttrs <- function(attrs, df){
attribs <- sapply(attrs, function(x) attr(df, x))
if(is.vector(attribs)){
toReturn <- as.data.frame(t(attribs), stringsAsFactors = FALSE)
}else{ #don't need to transpose
toReturn <- as.data.frame(attribs, stringsAsFactors = FALSE)
}
return(toReturn)
}

#strip specified attributes from a data frame
removeAttrs <- function(attrs, df){
for(a in attrs){
attr(df, a) <- NULL
}
return(df)
}
2 changes: 1 addition & 1 deletion inst/extdata/WaterML1Example.xml

Large diffs are not rendered by default.

Loading

0 comments on commit 9c00e51

Please sign in to comment.