From d08c0ea793283d47a3209e1d81e5c7c3f9b76268 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Fri, 4 Dec 2015 12:04:36 -0600 Subject: [PATCH 1/2] Clean up header checks on WQP. --- R/getWebServiceData.R | 5 +- R/importRDB1.r | 2 +- R/importWQP.R | 171 +++++++++++++++++++-------------------- R/whatWQPsites.R | 39 +-------- man/getWebServiceData.Rd | 4 +- 5 files changed, 95 insertions(+), 126 deletions(-) diff --git a/R/getWebServiceData.R b/R/getWebServiceData.R index 8977039d..e016adcb 100644 --- a/R/getWebServiceData.R +++ b/R/getWebServiceData.R @@ -4,6 +4,7 @@ #' \code{\link[RCurl]{getURI}} with more informative error messages. #' #' @param obs_url character containing the url for the retrieval +#' @param \dots information to pass to header request #' @import RCurl #' @export #' @return raw data from web services @@ -17,10 +18,10 @@ #' \dontrun{ #' rawData <- getWebServiceData(obs_url) #' } -getWebServiceData <- function(obs_url){ +getWebServiceData <- function(obs_url, ...){ possibleError <- tryCatch({ h <- basicHeaderGatherer() - returnedDoc <- getURI(obs_url, headerfunction = h$update, encoding='gzip') + returnedDoc <- getURI(obs_url, headerfunction = h$update, ...) }, warning = function(w) { warning(w, "with url:", obs_url) }, error = function(e) { diff --git a/R/importRDB1.r b/R/importRDB1.r index 57f7561c..d584f096 100644 --- a/R/importRDB1.r +++ b/R/importRDB1.r @@ -98,7 +98,7 @@ importRDB1 <- function(obs_url, asDateTime=TRUE, convertType = TRUE, tz=""){ if(file.exists(obs_url)){ doc <- obs_url } else { - doc <- getWebServiceData(obs_url) + doc <- getWebServiceData(obs_url, encoding='gzip') if("warn" %in% names(attr(doc,"header"))){ data <- data.frame() attr(data, "header") <- attr(doc,"header") diff --git a/R/importWQP.R b/R/importWQP.R index ebff7ebd..10a5a134 100644 --- a/R/importWQP.R +++ b/R/importWQP.R @@ -50,107 +50,104 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){ "America/Jamaica","America/Managua", "America/Phoenix","America/Metlakatla")) } + if(!file.exists(obs_url)){ - h <- basicHeaderGatherer() - httpHEAD(obs_url, headerfunction = h$update) + doc <- getWebServiceData(obs_url) + headerInfo <- attr(doc, "headerInfo") + + numToBeReturned <- as.numeric(headerInfo["Total-Result-Count"]) + sitesToBeReturned <- as.numeric(headerInfo["Total-Site-Count"]) - headerInfo <- h$value() + totalReturned <- sum(numToBeReturned, sitesToBeReturned,na.rm = TRUE) + if(is.na(totalReturned) | totalReturned == 0){ + for(i in grep("Warning",names(headerInfo))){ + warning(headerInfo[i]) + } + return(data.frame()) + } - if(headerInfo['status'] == "200"){ - - numToBeReturned <- as.numeric(headerInfo["Total-Result-Count"]) + if(zip){ + temp <- tempfile() + options(timeout = 120) - if(is.na(numToBeReturned) | numToBeReturned == 0){ - for(i in grep("Warning",names(headerInfo))){ - warning(headerInfo[i]) + possibleError <- tryCatch({ + suppressWarnings(download.file(obs_url,temp, quiet=TRUE, mode='wb')) + }, + error = function(e) { + stop(e, "with url:", obs_url) } - return(data.frame()) - } - - if(zip){ - temp <- tempfile() - options(timeout = 120) - - possibleError <- tryCatch({ - suppressWarnings(download.file(obs_url,temp, quiet=TRUE, mode='wb')) - }, - error = function(e) { - stop(e, "with url:", obs_url) - } - ) - doc <- unzip(temp) - retval <- read_delim(doc, - col_types = cols(`ActivityStartTime/Time` = col_character(), - `ActivityEndTime/Time` = col_character(), - USGSPCode = col_character(), - ResultCommentText=col_character(), - `ActivityDepthHeightMeasure/MeasureValue` = col_number(), - `DetectionQuantitationLimitMeasure/MeasureValue` = col_number(), - ResultMeasureValue = col_number()), - quote = "", delim = "\t") - unlink(doc) - } else { - retval <- read_delim(obs_url, - col_types = cols(`ActivityStartTime/Time` = col_character(), - `ActivityEndTime/Time` = col_character(), - USGSPCode = col_character(), - ResultCommentText=col_character(), - `ActivityDepthHeightMeasure/MeasureValue` = col_number(), - `DetectionQuantitationLimitMeasure/MeasureValue` = col_number(), - ResultMeasureValue = col_number()), - quote = "", delim = "\t") - } - } else { - stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", obs_url) + ) + obs_url <- temp } + + } else { + doc <- obs_url + } + + if(zip){ + doc <- unzip(doc) + retval <- suppressWarnings(read_delim(doc, + col_types = cols(`ActivityStartTime/Time` = col_character(), + `ActivityEndTime/Time` = col_character(), + USGSPCode = col_character(), + ResultCommentText=col_character(), + `ActivityDepthHeightMeasure/MeasureValue` = col_number(), + `DetectionQuantitationLimitMeasure/MeasureValue` = col_number(), + ResultMeasureValue = col_number(), + `WellDepthMeasure/MeasureValue` = col_number(), + `WellHoleDepthMeasure/MeasureValue` = col_number(), + `HUCEightDigitCode` = col_character()), + quote = "", delim = "\t")) + unlink(doc) + } else { + retval <- suppressWarnings(read_delim(doc, + col_types = cols(`ActivityStartTime/Time` = col_character(), + `ActivityEndTime/Time` = col_character(), + USGSPCode = col_character(), + ResultCommentText=col_character(), + `ActivityDepthHeightMeasure/MeasureValue` = col_number(), + `DetectionQuantitationLimitMeasure/MeasureValue` = col_number(), + ResultMeasureValue = col_number(), + `WellDepthMeasure/MeasureValue` = col_number(), + `WellHoleDepthMeasure/MeasureValue` = col_number(), + `HUCEightDigitCode` = col_character()), + quote = "", delim = "\t")) + } + if(!file.exists(obs_url)){ actualNumReturned <- nrow(retval) - if(actualNumReturned != numToBeReturned) warning(numToBeReturned, " sample results were expected, ", actualNumReturned, " were returned") - } else { + if(actualNumReturned != numToBeReturned & actualNumReturned != sitesToBeReturned){ + warning(totalReturned, " sample results were expected, ", actualNumReturned, " were returned") + } + } + + if(length(grep("ActivityStartTime",names(retval))) > 0){ - if(zip){ - doc <- unzip(obs_url) - retval <- read_delim(obs_url, - col_types = cols(`ActivityStartTime/Time` = col_character(), - `ActivityEndTime/Time` = col_character(), - USGSPCode = col_character(), - ResultCommentText=col_character()), - quote = "", delim = "\t") - unlink(doc) - } else { - retval <- read_delim(obs_url, - col_types = cols(`ActivityStartTime/Time` = col_character(), - `ActivityEndTime/Time` = col_character(), - USGSPCode = col_character(), - ResultCommentText=col_character()), - quote = "", delim = "\t") - } + + offsetLibrary <- data.frame(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10, 0, 0), + code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST","", NA), + stringsAsFactors = FALSE) + + retval <- left_join(retval, offsetLibrary, by=c("ActivityStartTime/TimeZoneCode"="code")) + names(retval)[names(retval) == "offset"] <- "timeZoneStart" + retval <- left_join(retval, offsetLibrary, by=c("ActivityEndTime/TimeZoneCode"="code")) + names(retval)[names(retval) == "offset"] <- "timeZoneEnd" + + dateCols <- c("ActivityStartDate","ActivityEndDate","AnalysisStartDate","PreparationStartDate") + + retval <- suppressWarnings(mutate_each_(retval, ~as.Date(parse_date_time(., c("Ymd", "mdY"))), dateCols)) + + retval <- mutate_(retval, ActivityStartDateTime=~paste(ActivityStartDate, `ActivityStartTime/Time`)) + retval <- mutate_(retval, ActivityEndDateTime=~paste(ActivityEndDate, `ActivityEndTime/Time`)) + retval <- mutate_(retval, ActivityStartDateTime=~fast_strptime(ActivityStartDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart) + retval <- mutate_(retval, ActivityEndDateTime=~fast_strptime(ActivityEndDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart) + + retval <- select_(retval, ~-timeZoneEnd, ~-timeZoneStart) } - - offsetLibrary <- data.frame(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10, 0, 0), - code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST","", NA), - stringsAsFactors = FALSE) - - retval <- left_join(retval, offsetLibrary, by=c("ActivityStartTime/TimeZoneCode"="code")) - names(retval)[names(retval) == "offset"] <- "timeZoneStart" - retval <- left_join(retval, offsetLibrary, by=c("ActivityEndTime/TimeZoneCode"="code")) - names(retval)[names(retval) == "offset"] <- "timeZoneEnd" - - dateCols <- c("ActivityStartDate","ActivityEndDate","AnalysisStartDate","PreparationStartDate") - - retval <- suppressWarnings(mutate_each_(retval, ~as.Date(parse_date_time(., c("Ymd", "mdY"))), dateCols)) - - retval <- mutate_(retval, ActivityStartDateTime=~paste(ActivityStartDate, `ActivityStartTime/Time`)) - retval <- mutate_(retval, ActivityEndDateTime=~paste(ActivityEndDate, `ActivityEndTime/Time`)) - - retval <- mutate_(retval, ActivityStartDateTime=~fast_strptime(ActivityStartDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart) - retval <- mutate_(retval, ActivityEndDateTime=~fast_strptime(ActivityEndDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart) - - retval <- select_(retval, ~-timeZoneEnd, ~-timeZoneStart) names(retval)[grep("/",names(retval))] <- gsub("/",".",names(retval)[grep("/",names(retval))]) return(retval) diff --git a/R/whatWQPsites.R b/R/whatWQPsites.R index ab97431a..b7738261 100644 --- a/R/whatWQPsites.R +++ b/R/whatWQPsites.R @@ -98,41 +98,10 @@ whatWQPsites <- function(...){ urlCall, "&mimeType=tsv&sorted=no",sep = "") - doc <- getWebServiceData(urlCall) - headerInfo <- attr(doc, "headerInfo") - - numToBeReturned <- as.numeric(headerInfo["Total-Site-Count"]) + retval <- importWQP(urlCall) + retval$queryTime <- Sys.time() + + return(retval) - if (!is.na(numToBeReturned) & numToBeReturned != 0){ - - retval <- read.delim(textConnection(doc), header = TRUE, - dec=".", sep='\t', quote="", - colClasses=c('character'), - fill = TRUE) - actualNumReturned <- nrow(retval) - - if(actualNumReturned != numToBeReturned) warning(numToBeReturned, " sites were expected, ", actualNumReturned, " were returned") - - if("LatitudeMeasure" %in% names(retval)){ - retval$LatitudeMeasure <- as.numeric(retval$LatitudeMeasure) - } - - if("LongitudeMeasure" %in% names(retval)){ - retval$LongitudeMeasure <- as.numeric(retval$LongitudeMeasure) - } - - retval$queryTime <- Sys.time() - - return(retval) - - } else { - if(headerInfo['Total-Site-Count'] == "0"){ - warning("No data returned") - } - - for(i in grep("Warning",names(headerInfo))){ - warning(headerInfo[i]) - } - } } diff --git a/man/getWebServiceData.Rd b/man/getWebServiceData.Rd index fba5749b..9d049704 100644 --- a/man/getWebServiceData.Rd +++ b/man/getWebServiceData.Rd @@ -4,10 +4,12 @@ \alias{getWebServiceData} \title{Function to return data from web services} \usage{ -getWebServiceData(obs_url) +getWebServiceData(obs_url, ...) } \arguments{ \item{obs_url}{character containing the url for the retrieval} + +\item{\dots}{information to pass to header request} } \value{ raw data from web services From cb6ffa5808b22d7c0e1ede681f1800560fd0ceb5 Mon Sep 17 00:00:00 2001 From: Laura DeCicco Date: Fri, 4 Dec 2015 13:17:02 -0600 Subject: [PATCH 2/2] Use getWebService with WQP. Still not using for zips though --- R/importWQP.R | 34 ++++++++++++++++++---------------- R/importWaterML1.r | 2 +- R/whatNWISsites.R | 2 +- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/R/importWQP.R b/R/importWQP.R index 10a5a134..98ef7e26 100644 --- a/R/importWQP.R +++ b/R/importWQP.R @@ -53,20 +53,6 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){ if(!file.exists(obs_url)){ - doc <- getWebServiceData(obs_url) - headerInfo <- attr(doc, "headerInfo") - - numToBeReturned <- as.numeric(headerInfo["Total-Result-Count"]) - sitesToBeReturned <- as.numeric(headerInfo["Total-Site-Count"]) - - totalReturned <- sum(numToBeReturned, sitesToBeReturned,na.rm = TRUE) - - if(is.na(totalReturned) | totalReturned == 0){ - for(i in grep("Warning",names(headerInfo))){ - warning(headerInfo[i]) - } - return(data.frame()) - } if(zip){ temp <- tempfile() @@ -79,7 +65,23 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){ stop(e, "with url:", obs_url) } ) - obs_url <- temp + doc <- temp + + } else { + doc <- getWebServiceData(obs_url) + headerInfo <- attr(doc, "headerInfo") + + numToBeReturned <- as.numeric(headerInfo["Total-Result-Count"]) + sitesToBeReturned <- as.numeric(headerInfo["Total-Site-Count"]) + + totalReturned <- sum(numToBeReturned, sitesToBeReturned,na.rm = TRUE) + + if(is.na(totalReturned) | totalReturned == 0){ + for(i in grep("Warning",names(headerInfo))){ + warning(headerInfo[i]) + } + return(data.frame()) + } } } else { @@ -116,7 +118,7 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){ quote = "", delim = "\t")) } - if(!file.exists(obs_url)){ + if(!file.exists(obs_url) & !zip){ actualNumReturned <- nrow(retval) if(actualNumReturned != numToBeReturned & actualNumReturned != sitesToBeReturned){ diff --git a/R/importWaterML1.r b/R/importWaterML1.r index 1765808f..8faa75d7 100644 --- a/R/importWaterML1.r +++ b/R/importWaterML1.r @@ -100,7 +100,7 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ if(file.exists(obs_url)){ rawData <- obs_url } else { - rawData <- getWebServiceData(obs_url) + rawData <- getWebServiceData(obs_url, encoding='gzip') } returnedDoc <- xmlTreeParse(rawData, getDTD = FALSE, useInternalNodes = TRUE) diff --git a/R/whatNWISsites.R b/R/whatNWISsites.R index c7b864ae..79efc7c6 100644 --- a/R/whatNWISsites.R +++ b/R/whatNWISsites.R @@ -41,7 +41,7 @@ whatNWISsites <- function(...){ urlCall <- drURL('waterservices',Access=pkg.env$access, format="mapper", arg.list = values) - rawData <- getWebServiceData(urlCall) + rawData <- getWebServiceData(urlCall, encoding='gzip') doc <- xmlTreeParse(rawData, getDTD = FALSE, useInternalNodes = TRUE)