-
Notifications
You must be signed in to change notification settings - Fork 84
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #272 from wdwatkins/NGWMN
NGWMN getObservation
- Loading branch information
Showing
9 changed files
with
468 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
#' Function to return data from the National Ground Water Monitoring Network waterML2 format | ||
#' | ||
#' This function accepts a url parameter for a WaterML2 getObservation. This function is still under development, | ||
#' but the general functionality is correct. | ||
#' | ||
#' @param input character or raw, containing the url for the retrieval or a path to the data file, or raw XML. | ||
#' @param asDateTime logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, character | ||
#' @param tz character to set timezone attribute of datetime. Default is an empty quote, which converts the | ||
#' datetimes to UTC (properly accounting for daylight savings times based on the data's provided time zone offset). | ||
#' Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles", | ||
#' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla" | ||
#' @return mergedDF a data frame source, time, value, uom, uomTitle, comment, gmlID | ||
#' @export | ||
#' @importFrom xml2 read_xml | ||
#' @importFrom xml2 xml_find_all | ||
#' @importFrom xml2 xml_text | ||
#' @importFrom xml2 xml_attr | ||
#' @importFrom lubridate parse_date_time | ||
#' @examples | ||
#' \dontrun{ | ||
#' url <- "http://cida.usgs.gov/ngwmn_cache/sos?request=GetObservation&service=SOS&version=2.0.0 | ||
#' &observedProperty=urn:ogc:def:property:OGC:GroundWaterLevel&responseFormat=text/xml&featureOf | ||
#' Interest=VW_GWDP_GEOSERVER.USGS.403836085374401" | ||
#' data <- importNGWMN_wml2(url) | ||
#' | ||
#' url <- "http://cida.usgs.gov/ngwmn_cache/sos?request=GetObservation&service=SOS&version=2.0.0 | ||
#' &observedProperty=urn:ogc:def:property:OGC:GroundWaterLevel&responseFormat=text/xml&featureOf | ||
#' Interest=VW_GWDP_GEOSERVER.USGS.474011117072901" | ||
#' data <- importNGWMN_wml2(url) | ||
#' } | ||
#' | ||
importNGWMN_wml2 <- function(input, asDateTime=FALSE, tz=""){ | ||
if(tz != ""){ | ||
tz <- match.arg(tz, c("America/New_York","America/Chicago", | ||
"America/Denver","America/Los_Angeles", | ||
"America/Anchorage","America/Honolulu", | ||
"America/Jamaica","America/Managua", | ||
"America/Phoenix","America/Metlakatla")) | ||
}else{tz = "UTC"} | ||
|
||
raw <- FALSE | ||
if(class(input) == "character" && file.exists(input)){ | ||
returnedDoc <- read_xml(input) | ||
}else if(class(input) == 'raw'){ | ||
returnedDoc <- read_xml(input) | ||
raw <- TRUE | ||
} else { | ||
returnedDoc <- xml_root(getWebServiceData(input, encoding='gzip')) | ||
} | ||
|
||
response <- xml_name(returnedDoc) | ||
if(response == "GetObservationResponse"){ | ||
|
||
timeSeries <- xml_find_all(returnedDoc, "//wml2:MeasurementTimeseries") #each parameter/site combo | ||
|
||
if(0 == length(timeSeries)){ | ||
df <- data.frame() | ||
if(!raw){ | ||
attr(df, "url") <- input | ||
} | ||
return(df) | ||
} | ||
|
||
mergedDF <- NULL | ||
|
||
for(t in timeSeries){ | ||
gmlID <- xml_attr(t,"id") | ||
TVP <- xml_find_all(t, ".//wml2:MeasurementTVP")#time-value pairs | ||
rawTime <- xml_text(xml_find_all(TVP,".//wml2:time")) | ||
|
||
valueNodes <- xml_find_all(TVP,".//wml2:value") | ||
values <- as.numeric(xml_text(valueNodes)) | ||
nVals <- length(values) | ||
gmlID <- rep(gmlID, nVals) | ||
|
||
#df of date, time, dateTime | ||
oneCol <- rep(NA, nVals) | ||
timeDF <- data.frame(date=oneCol, time=oneCol, dateTime=oneCol) | ||
splitTime <- data.frame(matrix(unlist(strsplit(rawTime, "T")), nrow=nVals, byrow = TRUE), stringsAsFactors=FALSE) | ||
names(splitTime) <- c("date", "time") | ||
timeDF <- mutate(splitTime, dateTime = NA) | ||
logicVec <- nchar(rawTime) > 19 | ||
timeDF$dateTime[logicVec] <- rawTime[logicVec] | ||
if(asDateTime){ | ||
timeDF$dateTime <- parse_date_time(timeDF$dateTime, c("%Y","%Y-%m-%d","%Y-%m-%dT%H:%M","%Y-%m-%dT%H:%M:%S", | ||
"%Y-%m-%dT%H:%M:%OS","%Y-%m-%dT%H:%M:%OS%z"), exact = TRUE) | ||
#^^setting tz in as.POSIXct just sets the attribute, does not convert the time! | ||
attr(time, 'tzone') <- tz | ||
} | ||
|
||
|
||
|
||
uom <- xml_attr(valueNodes, "uom", default = NA) | ||
source <- xml_attr(xml_find_all(TVP, ".//wml2:source"), "title") | ||
comment <- xml_text(xml_find_all(TVP, ".//wml2:comment")) | ||
|
||
df <- cbind.data.frame(source, timeDF, value=values, uom, comment, gmlID, | ||
stringsAsFactors=FALSE) | ||
if (is.null(mergedDF)){ | ||
mergedDF <- df | ||
} else { | ||
similarNames <- intersect(colnames(mergedDF), colnames(df)) | ||
mergedDF <- full_join(mergedDF, df, by=similarNames) | ||
} | ||
} | ||
|
||
if(!raw){ | ||
url <- input | ||
attr(mergedDF, "url") <- url | ||
} | ||
mergedDF$date <- as.Date(mergedDF$date) | ||
nonDateCols <- grep("date",names(mergedDF), value=TRUE, invert = TRUE) | ||
|
||
mergedDF[nonDateCols][mergedDF[nonDateCols] == "" | mergedDF[nonDateCols]== -999999.0] <- NA | ||
attr(mergedDF, "gml:identifier") <- xml_text(xml_find_all(returnedDoc, ".//gml:identifier")) | ||
attr(mergedDF, "generationDate") <- xml_text(xml_find_all(returnedDoc, ".//wml2:generationDate")) | ||
|
||
|
||
}else if(response == "GetFeatureOfInterestResponse"){ | ||
site <- xml_text(xml_find_all(returnedDoc,".//gml:identifier")) | ||
site <- substring(site, 8) | ||
|
||
#bandaid to work with only single site calls | ||
#TODO: need better solution when bbox is added | ||
siteDesc <- xml_text(xml_find_all(returnedDoc, ".//gml:description")) | ||
if(length(siteDesc) == 0){ | ||
siteDesc <- NA | ||
} | ||
|
||
siteLocs <- strsplit(xml_text(xml_find_all(returnedDoc, ".//gml:pos")), " ") | ||
siteLocs <- data.frame(dec_lat_va=as.numeric(siteLocs[[1]][1]), dec_lon_va=as.numeric(siteLocs[[1]][2]), stringsAsFactors = FALSE) | ||
mergedDF <- cbind.data.frame(site, description = siteDesc, siteLocs, stringsAsFactors = FALSE) | ||
} | ||
else{ | ||
stop("Unrecognized response from the web service") | ||
} | ||
return(mergedDF) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,209 @@ | ||
#' import data from the National Groundwater Monitoring Network \url{http://cida.usgs.gov/ngwmn/}. | ||
#' | ||
#' Only water level data is currently available through the web service. | ||
#' @param asDateTime logical if \code{TRUE}, will convert times to POSIXct format. Currently defaults to | ||
#' \code{FALSE} since time zone information is not included. | ||
#' @param featureID character Vector of feature IDs in the formatted with agency code and site number | ||
#' separated by a period, e.g. \code{USGS.404159100494601}. | ||
#' @param service character Identifies which web service to access. Only \code{observation} is currently | ||
#' supported, which retrieves all water level for each site. | ||
#' @param tz character to set timezone attribute of datetime. Default is an empty quote, which converts the | ||
#' datetimes to UTC (properly accounting for daylight savings times based on the data's provided time zone offset). | ||
#' Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles", | ||
#' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla" | ||
#' @import utils | ||
#' @importFrom dplyr mutate | ||
#' @importFrom dplyr bind_rows | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' #one site | ||
#' site <- "USGS.430427089284901" | ||
#' oneSite <- readNGWMNdata(featureID = site) | ||
#' | ||
#' #multiple sites | ||
#' sites <- c("USGS.272838082142201","USGS.404159100494601", "USGS.401216080362703") | ||
#' multiSiteData <- readNGWMNdata(sites) | ||
#' | ||
#' #non-USGS site | ||
#' site <- "MBMG.892195" | ||
#' data <- readNGWMNdata(featureID = site) | ||
#' | ||
#' #site with no data returns empty data frame | ||
#' noDataSite <- "UTGS.401544112060301" | ||
#' noDataSite <- readNGWMNdata(featureID = noDataSite, service = "observation") | ||
#' } | ||
#' | ||
|
||
readNGWMNdata <- function(featureID, service = "observation", asDateTime = TRUE, tz = ""){ | ||
message(" ******************************************************** | ||
DISCLAIMER: NGWMN retrieval functions are still in flux, | ||
and no future behavior or output is guaranteed | ||
*********************************************************") | ||
match.arg(service, c("observation", "featureOfInterest")) | ||
|
||
|
||
if(service == "observation"){ | ||
allObs <- NULL | ||
allAttrs <- NULL | ||
allSites <- NULL | ||
#these attributes are pulled out and saved when doing binds to be reattached | ||
attrs <- c("url","gml:identifier","generationDate") | ||
for(f in featureID){ | ||
obsFID <- retrieveObservation(f, asDateTime, attrs) | ||
siteFID <- retrieveFeatureOfInterest(f, asDateTime) | ||
if(is.null(allObs)){ | ||
allObs <- obsFID | ||
allSites <- siteFID | ||
allAttrs <- saveAttrs(attrs, allObs) | ||
}else{ | ||
obsFIDatt <- saveAttrs(attrs, obsFID) | ||
obsFID <- removeAttrs(attrs, obsFID) | ||
allAttrs <- bind_rows(allAttrs, obsFIDatt) | ||
allObs <- bind_rows(allObs, obsFID) | ||
allSites <- bind_rows(allSites, siteFID) | ||
} | ||
attributes(allObs) <- c(attributes(allObs),as.list(allAttrs)) | ||
attr(allObs, "siteInfo") <- allSites | ||
returnData <- allObs | ||
} | ||
}else if(service == "featureOfInterest"){ | ||
allSites <- NULL | ||
for(f in featureID){ | ||
siteFID <- retrieveFeatureOfInterest(f, asDateTime) | ||
if(is.null(allSites)){ | ||
allSites <- siteFID | ||
}else{ | ||
allSites <- bind_rows(allSites, siteFID) | ||
} | ||
} | ||
returnData <- allSites | ||
}else{ | ||
stop("unrecognized service request") | ||
} | ||
|
||
return(returnData) | ||
} | ||
|
||
#' Retrieve groundwater levels from the National Ground Water Monitoring Network \url{http://cida.usgs.gov/ngwmn/}. | ||
#' | ||
#' @param featureID character Vector of feature IDs in the formatted with agency code and site number | ||
#' separated by a period, e.g. \code{USGS.404159100494601}. | ||
#' | ||
#' @export | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' #one site | ||
#' site <- "USGS.430427089284901" | ||
#' oneSite <- readNGWMNlevels(featureID = site) | ||
#' | ||
#' #multiple sites | ||
#' sites <- c("USGS.272838082142201","USGS.404159100494601", "USGS.401216080362703") | ||
#' multiSiteData <- readNGWMNlevels(sites) | ||
#' | ||
#' #non-USGS site | ||
#' site <- "MBMG.892195" | ||
#' data <- readNGWMNlevels(featureID = site) | ||
#' | ||
#' #site with no data returns empty data frame | ||
#' noDataSite <- "UTGS.401544112060301" | ||
#' noDataSite <- readNGWMNlevels(featureID = noDataSite) | ||
#' } | ||
|
||
readNGWMNlevels <- function(featureID){ | ||
data <- readNGWMNdata(featureID, service = "observation") | ||
return(data) | ||
} | ||
|
||
#' Retrieve site data from the National Ground Water Monitoring Network \url{http://cida.usgs.gov/ngwmn/}. | ||
#' | ||
#' @param featureID character Vector of feature IDs in the formatted with agency code and site number | ||
#' separated by a period, e.g. \code{USGS.404159100494601}. | ||
#' | ||
#' @export | ||
#' @return A data frame the following columns: | ||
#' #' \tabular{lll}{ | ||
#' Name \tab Type \tab Description \cr | ||
#' site \tab char \tab Site FID \cr | ||
#' description \tab char \tab Site description \cr | ||
#' dec_lat_va, dec_lon_va \tab numeric \tab Site latitude and longitude \cr | ||
#' } | ||
#' @examples | ||
#' \dontrun{ | ||
#' #one site | ||
#' site <- "USGS.430427089284901" | ||
#' oneSite <- readNGWMNsites(featureID = site) | ||
#' | ||
#' #multiple sites | ||
#' sites <- c("USGS.272838082142201","USGS.404159100494601", "USGS.401216080362703") | ||
#' multiSiteInfo <- readNGWMNsites(sites) | ||
#' | ||
#' #non-USGS site | ||
#' site <- "MBMG.892195" | ||
#' siteInfo <- readNGWMNsites(featureID = site) | ||
#' | ||
#' } | ||
|
||
readNGWMNsites <- function(featureID){ | ||
sites <- readNGWMNdata(featureID, service = "featureOfInterest") | ||
return(sites) | ||
} | ||
|
||
|
||
|
||
retrieveObservation <- function(featureID, asDateTime, attrs){ | ||
#will need to contruct this more piece by piece if other versions, properties are added | ||
baseURL <- "http://cida.usgs.gov/ngwmn_cache/sos?request=GetObservation&service=SOS&version=2.0.0&observedProperty=urn:ogc:def:property:OGC:GroundWaterLevel&responseFormat=text/xml&featureOfInterest=VW_GWDP_GEOSERVER." | ||
url <- paste0(baseURL, featureID) | ||
|
||
returnData <- importNGWMN_wml2(url, asDateTime) | ||
if(nrow(returnData) == 0){ | ||
#need to add NA attributes, so they aren't messed up when stored as DFs | ||
attr(returnData, "gml:identifier") <- NA | ||
attr(returnData, "generationDate") <- NA | ||
} | ||
|
||
#mutate removes the attributes, need to save and append | ||
attribs <- saveAttrs(attrs, returnData) | ||
if(nrow(returnData) > 0){ | ||
#tack on site number | ||
siteNum <- rep(sub('.*\\.', '', featureID), nrow(returnData)) | ||
returnData <- mutate(returnData, site = siteNum) | ||
numCol <- ncol(returnData) | ||
returnData <- returnData[,c(numCol,1:(numCol - 1))] #move siteNum to the left | ||
} | ||
attributes(returnData) <- c(attributes(returnData), as.list(attribs)) | ||
|
||
return(returnData) | ||
} | ||
|
||
#retrieve feature of interest | ||
#don't expose until can support bbox | ||
#note: import function can only do single sites right now | ||
retrieveFeatureOfInterest <- function(featureID, asDateTime){ | ||
baseURL <- "http://cida.usgs.gov/ngwmn_cache/sos?request=GetFeatureOfInterest&service=SOS&version=2.0.0&observedProperty=urn:ogc:def:property:OGC:GroundWaterLevel&responseFormat=text/xml&featureOfInterest=VW_GWDP_GEOSERVER." | ||
url <- paste0(baseURL, featureID) | ||
siteDF <- importNGWMN_wml2(url, asDateTime) | ||
return(siteDF) | ||
} | ||
|
||
|
||
#save specified attributes from a data frame | ||
saveAttrs <- function(attrs, df){ | ||
attribs <- sapply(attrs, function(x) attr(df, x)) | ||
if(is.vector(attribs)){ | ||
toReturn <- as.data.frame(t(attribs), stringsAsFactors = FALSE) | ||
}else{ #don't need to transpose | ||
toReturn <- as.data.frame(attribs, stringsAsFactors = FALSE) | ||
} | ||
return(toReturn) | ||
} | ||
|
||
#strip specified attributes from a data frame | ||
removeAttrs <- function(attrs, df){ | ||
for(a in attrs){ | ||
attr(df, a) <- NULL | ||
} | ||
return(df) | ||
} |
Large diffs are not rendered by default.
Oops, something went wrong.
Oops, something went wrong.