Skip to content

Commit

Permalink
Merge pull request #155 from ldecicco-USGS/master
Browse files Browse the repository at this point in the history
Clean up getWebService calls
  • Loading branch information
ldecicco-USGS committed Dec 4, 2015
2 parents b8d3388 + cb6ffa5 commit 597f49e
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 125 deletions.
5 changes: 3 additions & 2 deletions R/getWebServiceData.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#' \code{\link[RCurl]{getURI}} with more informative error messages.
#'
#' @param obs_url character containing the url for the retrieval
#' @param \dots information to pass to header request
#' @import RCurl
#' @export
#' @return raw data from web services
Expand All @@ -17,10 +18,10 @@
#' \dontrun{
#' rawData <- getWebServiceData(obs_url)
#' }
getWebServiceData <- function(obs_url){
getWebServiceData <- function(obs_url, ...){
possibleError <- tryCatch({
h <- basicHeaderGatherer()
returnedDoc <- getURI(obs_url, headerfunction = h$update, encoding='gzip')
returnedDoc <- getURI(obs_url, headerfunction = h$update, ...)
}, warning = function(w) {
warning(w, "with url:", obs_url)
}, error = function(e) {
Expand Down
2 changes: 1 addition & 1 deletion R/importRDB1.r
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ importRDB1 <- function(obs_url, asDateTime=TRUE, convertType = TRUE, tz=""){
if(file.exists(obs_url)){
doc <- obs_url
} else {
doc <- getWebServiceData(obs_url)
doc <- getWebServiceData(obs_url, encoding='gzip')
if("warn" %in% names(attr(doc,"header"))){
data <- data.frame()
attr(data, "header") <- attr(doc,"header")
Expand Down
167 changes: 83 additions & 84 deletions R/importWQP.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,107 +50,106 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){
"America/Jamaica","America/Managua",
"America/Phoenix","America/Metlakatla"))
}


if(!file.exists(obs_url)){
h <- basicHeaderGatherer()
httpHEAD(obs_url, headerfunction = h$update)

headerInfo <- h$value()


if(headerInfo['status'] == "200"){
if(zip){
temp <- tempfile()
options(timeout = 120)

possibleError <- tryCatch({
suppressWarnings(download.file(obs_url,temp, quiet=TRUE, mode='wb'))
},
error = function(e) {
stop(e, "with url:", obs_url)
}
)
doc <- temp

} else {
doc <- getWebServiceData(obs_url)
headerInfo <- attr(doc, "headerInfo")

numToBeReturned <- as.numeric(headerInfo["Total-Result-Count"])
sitesToBeReturned <- as.numeric(headerInfo["Total-Site-Count"])

totalReturned <- sum(numToBeReturned, sitesToBeReturned,na.rm = TRUE)

if(is.na(numToBeReturned) | numToBeReturned == 0){
if(is.na(totalReturned) | totalReturned == 0){
for(i in grep("Warning",names(headerInfo))){
warning(headerInfo[i])
}
return(data.frame())
}

if(zip){
temp <- tempfile()
options(timeout = 120)

possibleError <- tryCatch({
suppressWarnings(download.file(obs_url,temp, quiet=TRUE, mode='wb'))
},
error = function(e) {
stop(e, "with url:", obs_url)
}
)
doc <- unzip(temp)
retval <- read_delim(doc,
col_types = cols(`ActivityStartTime/Time` = col_character(),
`ActivityEndTime/Time` = col_character(),
USGSPCode = col_character(),
ResultCommentText=col_character(),
`ActivityDepthHeightMeasure/MeasureValue` = col_number(),
`DetectionQuantitationLimitMeasure/MeasureValue` = col_number(),
ResultMeasureValue = col_number()),
quote = "", delim = "\t")
unlink(doc)
} else {
retval <- read_delim(obs_url,
col_types = cols(`ActivityStartTime/Time` = col_character(),
`ActivityEndTime/Time` = col_character(),
USGSPCode = col_character(),
ResultCommentText=col_character(),
`ActivityDepthHeightMeasure/MeasureValue` = col_number(),
`DetectionQuantitationLimitMeasure/MeasureValue` = col_number(),
ResultMeasureValue = col_number()),
quote = "", delim = "\t")
}
} else {
stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", obs_url)
}
}

} else {
doc <- obs_url
}

if(zip){
doc <- unzip(doc)
retval <- suppressWarnings(read_delim(doc,
col_types = cols(`ActivityStartTime/Time` = col_character(),
`ActivityEndTime/Time` = col_character(),
USGSPCode = col_character(),
ResultCommentText=col_character(),
`ActivityDepthHeightMeasure/MeasureValue` = col_number(),
`DetectionQuantitationLimitMeasure/MeasureValue` = col_number(),
ResultMeasureValue = col_number(),
`WellDepthMeasure/MeasureValue` = col_number(),
`WellHoleDepthMeasure/MeasureValue` = col_number(),
`HUCEightDigitCode` = col_character()),
quote = "", delim = "\t"))
unlink(doc)
} else {
retval <- suppressWarnings(read_delim(doc,
col_types = cols(`ActivityStartTime/Time` = col_character(),
`ActivityEndTime/Time` = col_character(),
USGSPCode = col_character(),
ResultCommentText=col_character(),
`ActivityDepthHeightMeasure/MeasureValue` = col_number(),
`DetectionQuantitationLimitMeasure/MeasureValue` = col_number(),
ResultMeasureValue = col_number(),
`WellDepthMeasure/MeasureValue` = col_number(),
`WellHoleDepthMeasure/MeasureValue` = col_number(),
`HUCEightDigitCode` = col_character()),
quote = "", delim = "\t"))
}

if(!file.exists(obs_url) & !zip){
actualNumReturned <- nrow(retval)
if(actualNumReturned != numToBeReturned) warning(numToBeReturned, " sample results were expected, ", actualNumReturned, " were returned")

} else {
if(actualNumReturned != numToBeReturned & actualNumReturned != sitesToBeReturned){
warning(totalReturned, " sample results were expected, ", actualNumReturned, " were returned")
}
}

if(length(grep("ActivityStartTime",names(retval))) > 0){

if(zip){
doc <- unzip(obs_url)
retval <- read_delim(obs_url,
col_types = cols(`ActivityStartTime/Time` = col_character(),
`ActivityEndTime/Time` = col_character(),
USGSPCode = col_character(),
ResultCommentText=col_character()),
quote = "", delim = "\t")
unlink(doc)
} else {
retval <- read_delim(obs_url,
col_types = cols(`ActivityStartTime/Time` = col_character(),
`ActivityEndTime/Time` = col_character(),
USGSPCode = col_character(),
ResultCommentText=col_character()),
quote = "", delim = "\t")
}

offsetLibrary <- data.frame(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10, 0, 0),
code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST","", NA),
stringsAsFactors = FALSE)

retval <- left_join(retval, offsetLibrary, by=c("ActivityStartTime/TimeZoneCode"="code"))
names(retval)[names(retval) == "offset"] <- "timeZoneStart"
retval <- left_join(retval, offsetLibrary, by=c("ActivityEndTime/TimeZoneCode"="code"))
names(retval)[names(retval) == "offset"] <- "timeZoneEnd"

dateCols <- c("ActivityStartDate","ActivityEndDate","AnalysisStartDate","PreparationStartDate")

retval <- suppressWarnings(mutate_each_(retval, ~as.Date(parse_date_time(., c("Ymd", "mdY"))), dateCols))

retval <- mutate_(retval, ActivityStartDateTime=~paste(ActivityStartDate, `ActivityStartTime/Time`))
retval <- mutate_(retval, ActivityEndDateTime=~paste(ActivityEndDate, `ActivityEndTime/Time`))

retval <- mutate_(retval, ActivityStartDateTime=~fast_strptime(ActivityStartDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart)
retval <- mutate_(retval, ActivityEndDateTime=~fast_strptime(ActivityEndDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart)

retval <- select_(retval, ~-timeZoneEnd, ~-timeZoneStart)
}

offsetLibrary <- data.frame(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10, 0, 0),
code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST","", NA),
stringsAsFactors = FALSE)

retval <- left_join(retval, offsetLibrary, by=c("ActivityStartTime/TimeZoneCode"="code"))
names(retval)[names(retval) == "offset"] <- "timeZoneStart"
retval <- left_join(retval, offsetLibrary, by=c("ActivityEndTime/TimeZoneCode"="code"))
names(retval)[names(retval) == "offset"] <- "timeZoneEnd"

dateCols <- c("ActivityStartDate","ActivityEndDate","AnalysisStartDate","PreparationStartDate")

retval <- suppressWarnings(mutate_each_(retval, ~as.Date(parse_date_time(., c("Ymd", "mdY"))), dateCols))

retval <- mutate_(retval, ActivityStartDateTime=~paste(ActivityStartDate, `ActivityStartTime/Time`))
retval <- mutate_(retval, ActivityEndDateTime=~paste(ActivityEndDate, `ActivityEndTime/Time`))

retval <- mutate_(retval, ActivityStartDateTime=~fast_strptime(ActivityStartDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart)
retval <- mutate_(retval, ActivityEndDateTime=~fast_strptime(ActivityEndDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart)

retval <- select_(retval, ~-timeZoneEnd, ~-timeZoneStart)
names(retval)[grep("/",names(retval))] <- gsub("/",".",names(retval)[grep("/",names(retval))])

return(retval)
Expand Down
2 changes: 1 addition & 1 deletion R/importWaterML1.r
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){
if(file.exists(obs_url)){
rawData <- obs_url
} else {
rawData <- getWebServiceData(obs_url)
rawData <- getWebServiceData(obs_url, encoding='gzip')
}

returnedDoc <- xmlTreeParse(rawData, getDTD = FALSE, useInternalNodes = TRUE)
Expand Down
2 changes: 1 addition & 1 deletion R/whatNWISsites.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ whatNWISsites <- function(...){
urlCall <- drURL('waterservices',Access=pkg.env$access, format="mapper", arg.list = values)


rawData <- getWebServiceData(urlCall)
rawData <- getWebServiceData(urlCall, encoding='gzip')


doc <- xmlTreeParse(rawData, getDTD = FALSE, useInternalNodes = TRUE)
Expand Down
39 changes: 4 additions & 35 deletions R/whatWQPsites.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,41 +98,10 @@ whatWQPsites <- function(...){
urlCall,
"&mimeType=tsv&sorted=no",sep = "")

doc <- getWebServiceData(urlCall)
headerInfo <- attr(doc, "headerInfo")
numToBeReturned <- as.numeric(headerInfo["Total-Site-Count"])
retval <- importWQP(urlCall)
retval$queryTime <- Sys.time()

return(retval)

if (!is.na(numToBeReturned) & numToBeReturned != 0){

retval <- read.delim(textConnection(doc), header = TRUE,
dec=".", sep='\t', quote="",
colClasses=c('character'),
fill = TRUE)
actualNumReturned <- nrow(retval)

if(actualNumReturned != numToBeReturned) warning(numToBeReturned, " sites were expected, ", actualNumReturned, " were returned")

if("LatitudeMeasure" %in% names(retval)){
retval$LatitudeMeasure <- as.numeric(retval$LatitudeMeasure)
}

if("LongitudeMeasure" %in% names(retval)){
retval$LongitudeMeasure <- as.numeric(retval$LongitudeMeasure)
}

retval$queryTime <- Sys.time()

return(retval)

} else {
if(headerInfo['Total-Site-Count'] == "0"){
warning("No data returned")
}

for(i in grep("Warning",names(headerInfo))){
warning(headerInfo[i])
}
}

}
4 changes: 3 additions & 1 deletion man/getWebServiceData.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 597f49e

Please sign in to comment.