This repository has been archived by the owner on Jan 17, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
KAWN_Sensors_DataCleanup.Rmd
400 lines (337 loc) · 17 KB
/
KAWN_Sensors_DataCleanup.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
---
title: "Data Load and Cleanup"
author: "Michelle Catherine Kelly"
output: html_notebook
editor_options:
chunk_output_type: console
---
Copyright (c) 2019 Michelle Catherine Kelly
License: MIT License
```{r setup, echo = FALSE, warning=FALSE, error=FALSE, message=FALSE}
knitr::opts_chunk$set(cache=TRUE)
library(XML)
library(lubridate)
library(ggplot2)
library(data.table)
library(dplyr)
library(tidyr)
library(dataRetrieval) # USGS loading
# NEON loading
# library(devtools)
# install_github("NEONScience/NEON-utilities/neonUtilities", dependencies = TRUE)
library(neonUtilities)
```
```{r dataLoad}
# masterFile
# concatenates local nitratax, miniDOT files into a single master .csv (if no master exists). If data is located on USGS database, pulls from USGS database and saves local master. If local master is present, script loads from file
# sitename = character vector with proper capitalization
# USGS = logical vector, if TRUE data should is pulled from USGS, if FALSE data is loaded from local files
masterFile <- function(sitename, USGS){
if(file.exists(paste("./SensorData_CompiledFiles/KAWN_SensorData_",
sitename, ".csv", sep = ""))){
# if master file exists, load from master file
dataframe <- read.csv(paste("./SensorData_CompiledFiles/KAWN_SensorData_",
sitename, ".csv", sep = ""),
header = TRUE)
}
if(!file.exists(paste("./SensorData_CompiledFiles/KAWN_SensorData_",
sitename, ".csv", sep = ""))){
# if master file doesn't exist, compile master file
if(USGS == FALSE){
# load in nitratax data
nitratax_XMLtoCSV <- function(filename){
xmlData <- XML::xmlParse(file = filename)
xmlData <- XML::xmlToDataFrame(xmlData, homogeneous = F, stringsAsFactors = F)
xmlData <- xmlData[-(1:6),] # taking out text at top of dataframe
for (count in 1:length(xmlData)){
names(xmlData)[count] <- as.character(xmlData[1,count])
}
xmlData <- xmlData[-1,] # taking out character rows
xmlData <- xmlData[,-1] # taking out first blank column
xmlData$` TIME ` <- lubridate::as_datetime(xmlData$` TIME `,
tz = "America/Chicago",
format = "%m/%d/%Y %H:%M:%S")
xmlData$` NITRATE CONC. ` <- as.numeric(xmlData$` NITRATE CONC. `)
xmlData$` ER ` <- as.numeric(xmlData$` ER `)
xmlData$` EM ` <- as.numeric(xmlData$` EM `)
xmlData <- data.frame(xmlData$` TIME `, xmlData$` NITRATE CONC. `)
names(xmlData) <- c("dateTime", "nitrateNitrite")
return(xmlData)
}
nitratax <- nitratax_XMLtoCSV(paste("./SensorData_RawFiles/NITRATAX_",
sitename, ".xml", sep = ""))
if(file.exists(paste("./SensorData_RawFiles/miniDOT_",
sitename, ".txt", sep = ""))){
# load in minidot data
DO <- read.table(file = paste("./SensorData_RawFiles/miniDOT_",
sitename, ".txt", sep = ""),
header = TRUE, sep = ",", skip = 8, stringsAsFactors = FALSE,
colClasses = c("numeric", "POSIXct", "POSIXct", "numeric",
"numeric", "numeric", "numeric", "numeric"),
col.names = c("unixTimestamp", "time_UTC", "time_CST",
"battery_V", "temp_C", "DO_mgL", "DO_sat", "Q"))
DO <- DO[c(-1, -2, -4, -8)] #drop unix timestamp, time UTC, battery volts, Q
DO$time_CST <- round_date(DO$time_CST, unit = "5 mins") # round time to nearest 5 min
# merge minidot with nitratax by datetime
dataframe <- merge(nitratax, DO, by.x = "dateTime", by.y = "time_CST", all = TRUE)
dataframe$dateTime <- ymd_hms(dataframe$dateTime)
# save master file
write.csv(dataframe, row.names = FALSE,
file = paste("./SensorData_CompiledFiles/KAWN_SensorData_",
sitename, ".csv", sep = ""))
} else{
# save master file with just nitratax
dataframe <- nitratax
write.csv(dataframe, row.names = FALSE,
file = paste("KAWN_SensorData_", sitename, ".csv", sep = ""))
}
} else{ # use USGS package to pull data from web, then save locally
start <- ymd_hms("2018-01-01 00:00:00")
end <- ymd_hms("2018-06-15 00:00:00")
if(sitename == "Desoto"){
siteNo <- "06892350"
pCodes <- c("99133", "00060", "00065", "00300", "00301","00010",
"00095", "00400", "32295")
data <- readNWISuv(siteNumbers = siteNo, parameterCd = pCodes,
startDate = start, endDate = end, tz = "America/Chicago")
# Rename columns from codes to readable names
data <- renameNWISColumns(data)
# Fix remaining names that are still parameter codes
names(data)[names(data)=="00301_Inst"] <- "DO_percentsat"
names(data)[names(data)=="99133_Inst"] <- "nitrateNitrite"
# Convert from American to SI units
# cubic foot per second to m3 per second
ft3s_m3s <- function(ft3s){
m3s <- ft3s*0.0283168
return(m3s)
}
# foot to meters
ft_m <- function(ft){
m <- 0.3048*ft
return(m)
}
data$Flow_Inst <- ft3s_m3s(data$Flow_Inst)
data$GH_Inst <- ft_m(data$GH_Inst)
# drop agency code, siteNo, various quality codes
dataframe <- data[c(-1,-2,-5,-7,-9,-11,-13,-15,-17,-19,-21,-22)]
# add column for UTC time
dataframe$dateTime_UTC <- dataframe$dateTime + hours(5)
}
# save the USGS data locally
write.csv(dataframe, row.names = FALSE,
file = paste("./SensorData_CompiledFiles/KAWN_SensorData_",
sitename, ".csv", sep = ""))
}
}
return(dataframe)
}
bowersock <- masterFile(sitename = "Bowersock", USGS = FALSE)
eric <- masterFile(sitename = "Eric", USGS = FALSE)
eric$dateTime_UTC <- ymd_hms(eric$dateTime_UTC, tz = "UTC")
steve <- masterFile(sitename = "Steve", USGS = FALSE)
steve$dateTime_UTC <- ymd_hms(steve$dateTime_UTC, tz = "UTC")
desoto <- masterFile(sitename = "Desoto", USGS = TRUE)
desoto$dateTime_UTC <- ymd_hms(desoto$dateTime_UTC, tz = "UTC")
```
```{r reachQ}
# avg.Q: logical, TRUE returns dataframe of mean daily Q at all sites, FALSE returns all measurements of Q at all sites
# load from file: logical, if TRUE loads appropriate dataframe from file
Q.dataframe <- function(avg.Q){
if(avg.Q == TRUE &
file.exists("./SensorData_CompiledFiles/KAWN_USGSDischarge_dailymeans.csv")){
Qdata <- read.csv("./SensorData_CompiledFiles/KAWN_USGSDischarge_dailymeans.csv",
header = TRUE)
}
if(avg.Q == FALSE &
file.exists("./SensorData_CompiledFiles/KAWN_USGSDischarge_instantaneous.csv")){
Qdata <- read.csv("./SensorData_CompiledFiles/KAWN_USGSDischarge_instantaneous.csv",
header = TRUE)
}
if(!file.exists("./SensorData_CompiledFiles/KAWN_USGSDischarge_dailymeans.csv") |
!file.exists("./SensorData_CompiledFiles/KAWN_USGSDischarge_instantaneous.csv")){
Q1.lawrence <- "06891080" # USGS identifier codes for gauges of interest
Q3.wakarusa <- "06891500"
Q4.stranger <- "06892000"
Q5.desoto <- "06892350"
sites <- c(Q1.lawrence, Q3.wakarusa, Q4.stranger, Q5.desoto)
start <- ymd_hms("2018-01-01 00:00:00") # start date of data
end <- ymd_hms("2018-06-15 00:00:00") # end date of data
#pull from USGS server
Qdata <- readNWISuv(siteNumbers = sites, parameterCd = "00060", # discharge
startDate = start, endDate = end, tz = "America/Chicago")
Qdata <- renameNWISColumns(Qdata)
ft3s_m3s <- function(ft3s){
m3s <- ft3s*0.0283168
return(m3s)
}
Qdata$Flow_Inst <- ft3s_m3s(Qdata$Flow_Inst) # convert to SI units
Qdata$dateTime <- ymd_hms(Qdata$dateTime) # report date in lubridate format
Qdata$Flow_Inst <- as.numeric(Qdata$Flow_Inst)
Qdata <- Qdata[c(-1,-5,-6)] # take out the columns that report data quality codes
if(avg.Q == TRUE){
Qdata <- # compute an average value for each day
Qdata %>%
group_by(site_no, dateTime = floor_date(dateTime, "day")) %>%
summarize(Flow_Inst = mean(Flow_Inst))
Qdata$dateTime <- floor_date(Qdata$dateTime, unit = "day")
}
Qdata <- spread(Qdata, site_no, Flow_Inst) # each site is in its own column
names(Qdata)[names(Qdata)=="06891080"] <- "Q1.lawrence"
names(Qdata)[names(Qdata)=="06891500"] <- "Q3.wakarusa"
names(Qdata)[names(Qdata)=="06892000"] <- "Q4.stranger"
names(Qdata)[names(Qdata)=="06892350"] <- "Q5.desoto"
# read in flow data from farmland report
Q2.Farmland <- read.csv("./SensorData_RawFiles/FarmlandDailyReport_MCKformatted.csv",
header = TRUE, stringsAsFactors = FALSE)
Q2.Farmland <- Q2.Farmland[c(-2:-8, -10:-11)]
Q2.Farmland$Date <- mdy_hms(paste(Q2.Farmland$Date, "00:00:00", sep = " "))
Q2.Farmland <-
Q2.Farmland %>%
group_by(Date = floor_date(Date, "day")) %>%
summarise(flowRate_Outfall001A_gpm = mean(flowRate_Outfall001A_gpm))
Q2.Farmland <- Q2.Farmland[Q2.Farmland$Date >= ymd("2018-01-01"),] # Jan 1 onwards
# convert from gpm to m3s
gpm_m3s <- function(gpm){
m3s <- gpm*0.000063090196
return(m3s)
}
Q2.Farmland$flowRate_Outfall001A_gpm <- gpm_m3s(Q2.Farmland$flowRate_Outfall001A_gpm)
# merge Q2 with rest of Qdata
Qdata <- merge(Qdata, Q2.Farmland, by.x = "dateTime", by.y = "Date", all = TRUE)
names(Qdata)[names(Qdata)=="flowRate_Outfall001A_gpm"] <- "Q2.Farmland"
# pumping stops on apr 1
Qdata$Q2.Farmland[Qdata$dateTime > ymd("2018-04-01")] <- NA
# balance equation: Q5.desoto = Q1.lawrence + Q2.farmland + Q3.wakarusa + Q4.stranger
Qdata$Q5.desoto <- -1*Qdata$Q5.desoto
Qdata$difference <- rowSums(Qdata[2:5], na.rm = TRUE) #difference = unaccounted for water, m3/s
Qdata <- Qdata[Qdata$difference != 0.00,]# if row sum = 0, drop row
if(avg.Q == "TRUE"){
write.csv(Qdata, "./SensorData_CompiledFiles/KAWN_USGSDischarge_dailymeans.csv",
row.names = FALSE)
} else{
write.csv(Qdata, "./SensorData_CompiledFiles/KAWN_USGSDischarge_instantaneous.csv",
row.names = FALSE)
}
}
return(Qdata)
}
Qdata.avg <- Q.dataframe(avg.Q = TRUE)
Qdata.instant <- Q.dataframe(avg.Q = FALSE) # comb through this - a lot of na's throwing off the numbers
# plot estimate vs actual
y <- rowSums(Qdata.avg[c(2,3,4,6)], na.rm = TRUE)
x <- -1*Qdata.avg$Q5.desoto
fit <- summary(lm(y ~ x))
r2 <- fit$adj.r.squared
pval <- fit$coefficients[2,4]
plot(x = -1*Qdata.avg$Q5.desoto, y = rowSums(Qdata.avg[c(2,3,4,6)], na.rm = TRUE),
ylab = "Estimated discharge at Desoto, m3/s",
xlab = "Gauged discharge at Desoto, m3/s",
main = "Basic water balance estimates")
abline(lm(y ~ x))
text(180, 225, labels = bquote(italic(R)^2 == .(format(r2, digits = 3))))
text(180, 205, labels = bquote(italic(p) == .(format(pval, digits = 3))))
plot(x = Qdata.avg$dateTime, y = Qdata.avg$difference,
ylab = "Gap in water balance, m3/s",
xlab = "dateTime")
```
percent deviation = ((downstream Q - upstream Q) / upstream Q) * 100%
where difference = sum (Lawrence gauge, Farmland flow, Wakarusa R. gage, Stranger Creek gage) - Desoto gage
positive values: upstream has greater Q
negative values: downstream (desoto gauge) has greater Q
- if precipitation was entering reach, this will happen
- gages on wakarusa and stranger are pretty far upstream, unaccounting for any gains below the gage (therefore, water balance will tend to predict positive percent deviations)
```{r dischargeEstimate}
# using daily averages for a discharge estimate
# at Eric (site 2)
# Q.eric <- Q1.lawrence + Q2.farmland
# Q.eric <- data.frame(dateTime = Qdata.avg$dateTime,
# Q.eric = rowSums(Qdata.avg[c(2,6)], na.rm = TRUE))
# eric <- merge(eric, Q.eric, by = "dateTime", all = TRUE)
# eric$Q.eric <- data.frame(dateTime = Qdata.avg$dateTime,
# Q.eric = rowSums(Qdata.avg[c(2,6)], na.rm = TRUE))
# at Steve (site 3) - this estimate would improve if we had some information about mud creek, but we dont
# Q.steve <- Q1.lawrence + Q2.farmland
# Q.steve <- data.frame(dateTime = Qdata.avg$dateTime,
# Q.steve = rowSums(Qdata.avg[c(2,6)], na.rm = TRUE))
# for now, just using instantaneous values for Q1.lawrence, as metabolism estimates need more frequent than daily avgs
dischargeEstimate <- function(dataframe, timeStart){
Q.estimate <- data.frame(dateTime = ymd_hms(Qdata.instant$dateTime, tz = "America/Chicago"),
Q = Qdata.instant$Q1.lawrence)
dataframe <- merge(dataframe, Q.estimate, by = "dateTime", all = FALSE)
dataframe <- dataframe[dataframe$dateTime >= timeStart,]
dataframe$dateTime_UTC <- dataframe$dateTime + hours(5)
return(dataframe)
}
steve <- dischargeEstimate(steve,
timeStart = ymd_hms("2018-01-30 17:00:00",
tz = "America/Chicago"))
eric <- dischargeEstimate(eric,
timeStart = ymd_hms("2018-02-19 13:00:00",
tz = "America/Chicago"))
```
```{r NEONpullCleanup, include=FALSE}
downloadNEONfiles <- FALSE
if (downloadNEONfiles == TRUE){
dir.create(paste0(getwd(), "/NEONfiles"))
######## PAR ############
getPackage(dpID = "DP1.00024.001", site_code = "UKFS",
year_month = "2018-02", package = "basic",
savepath = paste(getwd(), "NEONfiles", sep = "/")) # Feb at KU field station
getPackage(dpID = "DP1.00024.001", site_code = "UKFS",
year_month = "2018-03", package = "basic",
savepath = paste(getwd(), "NEONfiles", sep = "/")) # March at KU field station
getPackage(dpID = "DP1.00024.001", site_code = "UKFS",
year_month = "2018-04", package = "basic",
savepath = paste(getwd(), "NEONfiles", sep = "/")) # April at KU field station
getPackage(dpID = "DP1.00024.001", site_code = "UKFS",
year_month = "2018-05", package = "basic",
savepath = paste(getwd(), "NEONfiles", sep = "/")) # May at KU field station
stackByTable(dpID = "DP1.00024.001",
filepath = paste0(getwd(), "/NEONfiles"),
savepath = paste0(getwd(), "/NEONfiles"),
folder = TRUE)
######## Barometric pressure ############
getPackage(dpID = "DP1.00004.001", site_code = "UKFS",
year_month = "2018-02", package = "basic",
savepath = paste(getwd(), "NEONfiles", sep = "/")) # Feb at KU field station
getPackage(dpID = "DP1.00004.001", site_code = "UKFS",
year_month = "2018-03", package = "basic",
savepath = paste(getwd(), "NEONfiles", sep = "/")) # March
getPackage(dpID = "DP1.00004.001", site_code = "UKFS",
year_month = "2018-04", package = "basic",
savepath = paste(getwd(), "NEONfiles", sep = "/")) # April
getPackage(dpID = "DP1.00004.001", site_code = "UKFS",
year_month = "2018-05", package = "basic",
savepath = paste(getwd(), "NEONfiles", sep = "/")) # May
stackByTable(dpID = "DP1.00004.001",
filepath = paste0(getwd(), "/NEONfiles"),
savepath = paste0(getwd(), "/NEONfiles"),
folder = TRUE)
}
PAR <- read.csv(file = paste0(getwd(), "/NEONfiles", "/stackedFiles", "/PARPAR_1min.csv"),
header = TRUE)
# Take PAR measurements from the highest up sensor (60 m)
PAR <- subset(PAR, subset = PAR$verticalPosition == 60)
# Keep NEON default time zone
PAR$startDateTime <- ymd_hms(PAR$startDateTime)
PAR <- data.frame(
dateTime_UTC = PAR$startDateTime,
PAR = PAR$PARMean
)
eric <- merge(eric, PAR, by = "dateTime_UTC") # PAR units are micromoles per m^2 per second
steve <- merge(steve, PAR, by = "dateTime_UTC")
desoto <- merge(desoto, PAR, by = "dateTime_UTC")
#### Barometric pressure, units are kPa
BarPress <- read.csv(file = paste0(getwd(), "/NEONfiles", "/stackedFiles", "/BP_1min.csv"),
header = TRUE)
BarPress <- data.frame(
dateTime_UTC = ymd_hms(BarPress$startDateTime, tz = "UTC"),
barom.pressure = BarPress$staPresMean # units: kPa
)
eric <- merge(eric, BarPress, by = "dateTime_UTC")
steve <- merge(steve, BarPress, by = "dateTime_UTC")
desoto <- merge(desoto, BarPress, by = "dateTime_UTC")
write.csv(eric, row.names = FALSE, file = "KAWN_SensorData_Eric.csv")
write.csv(steve, row.names = FALSE, file = "KAWN_SensorData_Steve.csv")
write.csv(desoto, row.names = FALSE, file = "KAWN_SensorData_Desoto.csv")
```