-
Notifications
You must be signed in to change notification settings - Fork 0
/
env-processing.txt
257 lines (167 loc) · 9.29 KB
/
env-processing.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
# Valid data periods for TMS sensors #
## PacoEzpela ##
SELECTED_TMS <- 94252896
ts_start = "2023-03-23 14:00:00"
SOIL_TYPE = list(a=-2.7822E-08, b = 0.000380321, c = -0.282628151) # otherwise, "sandy loam B" would be the closest
PLACE = 'PacoEzpela'
Código R para filtrar datos inválidos:
```{r}
# We clear the invalid data for each series:
series1 = (db.env$series == "94252898")
# first interval:
interval1.1 = (db.env$ts >= "2023-06-18 04:15:00")
db.env[series1 & interval1.1, c("vwc", "soil.temp", "surface.temp", "air.temp")] <- NA
series2 = (db.env$series == "94252899")
interval2.1 = (db.env$ts >= "2023-08-27 20:45:00")
db.env[series2 & interval2.1, c("vwc", "soil.temp", "surface.temp", "air.temp")] <- NA
```
## Miedes ##
SELECTED_TMS <- c(94231935, 94231940) # 94231940 has a bit of missing data from 6 to 26 mar24W
ts_start = "2022-03-10 13:00:00"
SOIL_TYPE = "sandy loam A"
PLACE = 'Miedes'
# series 94231939. Tres periodos inválidos:
1) De Jun a Ago 2022
2) sept 2022 a Nov 2022
3) jul23 a 26sept23 a las 18:00
4) 14jan24 20:15 al final
# series 94231938
1) No está nada claro donde empieza, pero antes de que empiece a llegar a hacer 0% VWC hay un momento con muchos outliers. Cojo como comienzo justo antes de que empiecen los outliers: 2022-06-11T20:15:00Z hasta el 2022-11-24T11:30:00Z. -> Enmedio hay un periodo de 20 días que sí es válido. Lo rescato, de modo que la cosa queda así:
1.1) del 2022-06-11T20:15:00Z al 2022-08-19T10:45:00Z
1.2) del 2022-09-08T22:15:00Z al 2022-11-24T11:30:00Z
2) del "2023-09-02 03:45:00 al 2023-09-26T15:30:00Z
# series 94231936
1) Antes del 2023-09-02T20:45:00Z hay un periodo claramente inválido. Es complicado determinar el comienzo. Me quedo con 2023-07-11T19:30:00Z como principio del perido inválido ya que ahí empieza a ir más para abajo que otros sensores analogos y tiene un pequeño salto hacia abajo, pero aún podría ser que esos datos fueran todavía válidos.
# series 94231940
- Unos poquininos datos inválidos después del 24 de dic 2023 a las 00:45 al 24 de dic a las 2:30
- Se pierde en el último mes :(
Datos inválidos a partir del 2024-03-09T18:15:00Z
Código R:
```{r}
# First, we remove all the data prior to installation:
ts_start = "2022-03-10 13:00:00"
head(db.env)
db.env <- db.env[which(db.env$ts>=ts_start),]
head(db.env)
```
```{r}
# Then we clear the invalid data for each series:
# for series1 = db.env$series == "94231939"
series1 = (db.env$series == "94231939")
# first interval:
interval1.1 = (db.env$ts >= "2022-06-28 22:45:00" & db.env$ts <= "2022-08-19 10:45:00")
# equivalent to: (adjust to corresponding series and interval)
#db.2 = db.env %>% filter((series == "94231939") & between(ts, ymd_hms("2022-05-14 21:00:00", tz = "Europe/Madrid"), ymd_hms("2022-05-19 11:45:00", tz = "Europe/Madrid") ))
# second interval: 2022-09-11T08:45:00Z al 2022-11-24T14:45:00Z
interval1.2 = (db.env$ts > "2022-09-08 22:15:00" & db.env$ts < "2022-11-18 10:15:00" )
# third interval: from 2023-07-28T08:15:00Z to 2023-09-26 18:00:00
interval1.3 = (db.env$ts > "2023-07-28 08:15:00" & db.env$ts < "2023-09-26 18:00:00")
interval1.4 = (db.env$ts > "2024-01-14 08:15:00")
db.env[series1 & (interval1.1 | interval1.2 | interval1.3 | interval1.4) ,c("vwc", "soil.temp", "surface.temp", "air.temp")] <- NA
# now series == "94231938"
series2 = db.env$series == "94231938"
# interval1:
interval2.1 = db.env$ts >= "2022-06-11 20:15:00" & db.env$ts <= "2022-08-19 10:45:00"
interval2.2 = db.env$ts >= "2022-09-08 22:15:00" & db.env$ts <= "2022-11-24 11:30:00"
# interval2: 2023-09-02T03:45:00Z to the end
interval2.3 = db.env$ts >= "2023-09-02 03:45:00" & db.env$ts < "2023-09-26 15:30:00"
db.env[series2 & (interval2.1 | interval2.2 | interval2.3), c("vwc", "soil.temp", "surface.temp", "air.temp")] <- NA
# now series == "94231936"
series3 = db.env$series == "94231936"
# third interval:
interval3.1 = (db.env$ts >= "2023-07-11 19:30:00") & (db.env$ts < "2023-09-13 14:45:00")
interval3.2 = (db.env$ts > "2024-03-19 21:15:00")
db.env[series3 & (interval3.1 | interval3.2), c("vwc", "soil.temp", "surface.temp", "air.temp")] <- NA
# now series == "94231940"
series4 = db.env$series == "94231940"
interval4.1 <- db.env$ts > "2023-12-24 00:45:00" & db.env$ts < "2023-12-24 02:30:00"
interval4.2 <- db.env$ts > "2024-03-09 18:15:00"
db.env[series4 & (interval4.1 | interval4.2), c("vwc", "soil.temp", "surface.temp", "air.temp")] <- NA
```
## Peñaflor ##
! Nuevos datos hasta Marzo24 falta datos de dos sensores. Con lo cual solo tenemos datos de 3 que vayan hasta marzo24 (94231947, 94231949 y 94231950).
! 94231947 tiene muchos huecos sin datos a partir de diciembre23
! 94231949 quedó fuera del suelo y no tiene datos desde 4jul23
SELECTED_TMS <- c(94231950)
SOIL_TYPE = "sandy loam B"
ts_start = "2022-03-15 12:30:00"
```{r}
# first for series1
series1 = (db.env$series == "94231949")
# first interval:
interval1.1 = (db.env$ts >= "2022-04-08 20:30:00") & (db.env$ts <= "2022-05-04 14:00:00")
# second interval, first series:
interval1.2 = (db.env$ts >= "2022-09-11 09:45:00") & (db.env$ts < "2022-11-24 14:45:00")
interval1.3 = (db.env$ts > "2023-09-30 20:45:00")
db.env[series1 & (interval1.1 | interval1.2 | interval1.3), c("vwc", "soil.temp", "surface.temp", "air.temp")] <- NA
# now second series
series2 = db.env$series == "94231942"
# interval0: prior to installation
interval2.0 = db.env$ts < "2022-03-29 10:15:00"
interval2.1 = db.env$ts >= "2022-06-29 20:45:00" & db.env$ts <= "2022-09-16 11:00:00"
db.env[series2 & (interval2.0 | interval2.1), c("vwc", "soil.temp", "surface.temp", "air.temp")] <- NA
# now series3 == "94231947"
series3 = db.env$series == "94231947"
# first interval:
interval3.1 = (db.env$ts >= "2023-07-04 09:30:00" & db.env$ts < "2023-09-27 15:00:00")
db.env[series3 & interval3.1, c("vwc", "soil.temp", "surface.temp", "air.temp")] <- NA
```
## Corbalan ##
SELECTED_TMS <- c(94231943)
SOIL_TYPE = list(a=-1.7567E-08, b = 0.00027249, c = -0.180617717) o (menos preciso): SOIL_TYPE = "sandy loam B"
ts_start = "2022-03-28 14:15:00"
Los datos empiezan a partir del 2022-03-28T13:30:00Z (antes es 0)
No son válidos:
- 94231944: un lapso en mayo-22 -> del 2022-05-14T21:00:00Z al 2022-05-19T11:45:00Z
- 94231944: desde sept-22 a nov-22 -> del 2022-09-09T21:30:00Z al 2022-11-16T13:45:00Z
- 94231945: desde final jul-23 a final sept-23 -> del 2023-07-26T08:45:00Z al 2023-09-28T13:30:00Z
- 94231944: pcpios ago-23 a final sept-23 -> del 2023-08-07T08:15:00Z al 2023-09-28T14:00:00Z
- 94231931: del 2023-08-23T08:00:00Z al 2023-09-28T14:00:00Z
R code:
First, we remove all the data prior to installation:
```{r}
ts_start = "2022-03-28 14:15:00"
head(db.env)
db.env <- db.env[which(db.env$ts>=ts_start),]
head(db.env)
```
Then, we set NA when there's no valid data:
```r
# Then we clear the invalid data for each series:
# first for series1 = db.env$series == "94231944"
series1 = (db.env$series == "94231944")
# first interval:
interval1.1 = (db.env$ts >= "2022-05-14 20:00:00" & db.env$ts <= "2022-05-19 11:45:00")
# equivalent to: (adjust to corresponding series and interval)
#db.2 = db.env %>% filter((series == "94231939") & between(ts, ymd_hms("2022-05-14 21:00:00", tz = "Europe/Madrid"), ymd_hms("2022-05-19 11:45:00", tz = "Europe/Madrid") ))
# second interval, first series:
interval1.2 = (db.env$ts >= "2022-09-09 21:30:00" & db.env$ts <= "2022-11-16 13:45:00" )
# third interva, first series:
interval1.3 = (db.env$ts >= "2023-08-06 22:15:00" & db.env$ts <= "2023-09-28 14:00:00")
db.env[series1 & (interval1.1 | interval1.2 | interval1.3 ),c("vwc", "soil.temp", "surface.temp", "air.temp")] <- NA
# now second series
series2 = db.env$series == "94231945"
interval2.1 = db.env$ts >= "2023-07-26 08:45:00" & db.env$ts <= "2023-09-28 13:30:00"
db.env[series2 & (interval2.1), c("vwc", "soil.temp", "surface.temp", "air.temp")] <- NA
# now series == "94231931"
series3 = db.env$series == "94231931"
# first interval:
interval3.1 = (db.env$ts >= "2023-08-23 08:00:00") & (db.env$ts <= "2023-09-28 14:00:00")
db.env[series3 & interval3.1, c("vwc", "soil.temp", "surface.temp", "air.temp")] <- NA
```
## Valcuerna ##
R code:
# 94252893 it's valid only from installation to mid April and then from mid-june to firsts of July
cond1 = db.env$series == "94252893"
# investigating, the first valid period is until "2023-04-13 21:00:00"
# and second valid period is from 2023-06-11 19:00:00 (very approximated) until 2023-07-05 03:30:00
interval = (aux$ts > "2023-04-13 21:00:00") & (aux$ts <= "2023-06-11 19:00:00")
db.env[interval & cond1,]$vwc <- NA # first invalid period: from 13 of april to 11 of june.
db.env[interval & cond1,]$temp <- NA # first invalid period: from 13 of april to 11 of june.
db.env[(aux$ts >= "2023-07-05 03:30:00") & cond1,]$vwc <- NA # second invalid period from 2023-07-05 03:30:00 to the end
db.env[(aux$ts >= "2023-07-05 03:30:00") & cond1,]$temp <- NA # second invalid period from 2023-07-05 03:30:00 to the end
# 94252894 it's invalid from mid-August to the end.
cond2 = db.env$series == "94252894"
# investigating, the invalid period starts exactly at 2023-08-18 06:30:00 and it goes until the end:
db.env[(aux$ts >= "2023-08-18 06:30:00") & cond2,]$vwc <- NA
db.env[(aux$ts >= "2023-08-18 06:30:00") & cond2,]$temp <- NA