-
Notifications
You must be signed in to change notification settings - Fork 0
/
svm_isflood.R
executable file
·146 lines (132 loc) · 5.74 KB
/
svm_isflood.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# imports
library(kernlab)
library(e1071) # SVM methodology
library(RColorBrewer) # customized coloring of plots
library(ggplot2)
library(caret)
library(pROC)
# data extraction
uniondata = read.csv("05054000+05082500_new_predict.csv")
uniondata = uniondata[-1][-1]
correlationmatrix = cor(uniondata)
highlycorr = findCorrelation(correlationmatrix, cutoff = 0.75)
print(highlycorr)
highlycorr = highlycorr[-15]
uniondata = uniondata[,-highlycorr]
uniondata = uniondata[-7][-2]
uniondata$Flood.type = ifelse((uniondata$Flood.type == 1 | uniondata$Flood.type == 2 | uniondata$Flood.type == 3), 1, 0)
# data split
set.seed(3033)
uniondata$Flood.type = as.factor(uniondata$Flood.type)
intrain <- createDataPartition(y = uniondata$Flood.type, p= 0.75, list = FALSE)
training <- uniondata[intrain,]
testing <- uniondata[-intrain,]
#names(uniondata)
#svmdata = uniondata[-1]
#svmdata <- scale(svmdata)
#svmdata <- data.frame(svmdata)
#svmdata$Severity = factor(svmdata$Severity)
#x=model.matrix(svmdata$Severity~.,svmdata)
#y=svmdata$Severity
#svmtrain = sample(1:nrow(svmdata),0.5*nrow(svmdata))
#svmrest = svmdata[-svmtrain,]
#svmvalid = sample(1:nrow(svmrest), 0.5*nrow(svmrest))
#svmtest = -svmvalid
#svmtv = c(svmtrain, svmvalid)
#svmcolnames = c("TurbidityMin", "TurbidityMean", "TurbidityMax", "Discrage", "OxygenMax", "OxygenMin", "OxygenMean", "TempMax", "TempMin", "TempMean", "ConductMax", "ConductMin", "ConductMean", "pHMax", "pHMin", "pHMed", "Flood", "Severity", "Month")
#svmdata$Severity = ifelse(svmdata$Severity == 1, "Yes", "No")
# linar svm
# attach(uniondata)
trctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 3)
set.seed(3233)
svm_Linear <- train(Flood.type~., data = training, method = "svmLinear",
trControl=trctrl,
preProcess = c("center", "scale"),
tuneLength = 4)
svmlinear.pre <- predict(svm_Linear,testing)
confusionMatrix(svmlinear.pre, testing$Flood.type) #0.1271
y_pre = as.numeric(svmlinear.pre)
y_acc = as.numeric(testing$Flood.type)
svmlinear.roc <- roc(y_acc, y_pre)
plot(svmlinear.roc, print.auc=TRUE, auc.polygon=TRUE,
grid=c(0.1, 0.2),grid.col=c("green", "red"),
max.auc.polygon=TRUE,auc.polygon.col="skyblue", print.thres=TRUE)
svmlinear.imp <- varImp(svm_Linear,scale=F)
plot(svmlinear.imp,top=10)
# reduce flood type, gage height and discharge
# uniondata = uniondata[-18]
# uniondata = uniondata[-8][-4]
# radial kernel
trctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 3)
set.seed(3233)
svm_Radial <- train(Flood.type ~., data = training, method = "svmRadial",
trControl=trctrl,
preProcess = c("center", "scale"),
tuneLength = 10)
svmradial.pre <- predict(svm_Radial,testing)
confusionMatrix(svmradial.pre, testing$Flood.type)# tpr = 0.5580
acc = mean(svmradial.pre == testing$Flood.type)
y_pre = as.numeric(svmradial.pre)
y_acc = as.numeric(testing$Flood.type)
svmradial.roc <- roc(y_acc, y_pre)
plot(svmradial.roc, print.auc=TRUE, auc.polygon=TRUE,
grid=c(0.1, 0.2),grid.col=c("green", "red"),
max.auc.polygon=TRUE,auc.polygon.col="skyblue", print.thres=TRUE)
ggroc(svmradial.roc, colour = "red")
svmradial.imp <- varImp(svm_Radial,scale=F)
plot(svmradial.imp,top=10, main = "is_flood")
# ploting
# uniondata = read.csv("05054000+05082500_new_predict.csv")
# uniondata = uniondata[-1][-1]
# correlationmatrix = cor(uniondata)
# highlycorr = findCorrelation(correlationmatrix, cutoff = 0.75)
# print(highlycorr)
# uniondata = uniondata[,-highlycorr]
# set.seed(3033)
# uniondata$Severity = as.factor(uniondata$Severity)
# intrain <- createDataPartition(y = uniondata$Severity, p= 0.75, list = FALSE)
# training <- uniondata[intrain,]
# testing <- uniondata[-intrain,]
# trctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 3)
# set.seed(3233)
#
# svm_Linear <- train(Severity~., data = training, method = "svmLinear",
# trControl=trctrl,
# preProcess = c("center", "scale"),
# tuneLength = 4)
# svm_Radial <- train(Severity ~., data = training, method = "svmRadial",
# trControl=trctrl,
# preProcess = c("center", "scale"),
# tuneLength = 10)
# svmlinear.imp <- varImp(svm_Linear,scale=F)
# plot(svmlinear.imp,top=10)
# svmradial.imp <- varImp(svm_Radial,scale=F)
# plot(svmradial.imp,top=10)
# messy variables
uniondata = read.csv("05054000+05082500_new_predict.csv")
uniondata = uniondata[-1][-1]
data3 = uniondata[-19][-8][-4]
set.seed(3033)
data3$Flood.type = ifelse((data3$Flood.type == 1 | data3$Flood.type == 2 | data3$Flood.type == 3), 1, 0)
data3$Flood.type = as.factor(data3$Flood.type)
intrain <- createDataPartition(y = data3$Flood.type, p= 0.75, list = FALSE)
training <- data3[intrain,]
testing <- data3[-intrain,]
trctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 3)
set.seed(3233)
# linear
svm_Linear <- train(Flood.type ~., data = training, method = "svmLinear",
trControl=trctrl,
preProcess = c("center", "scale"),
tuneLength = 10)
svmlinear.pre <- predict(svm_Linear,testing)
confusionMatrix(svmlinear.pre, testing$Flood.type)# tpr = 0.2486
trctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 3)
set.seed(3233)
# nonlinear
svm_Radial <- train(Flood.type ~., data = training, method = "svmRadial",
trControl=trctrl,
preProcess = c("center", "scale"),
tuneLength = 10)
svmradial.pre <- predict(svm_Radial,testing)
confusionMatrix(svmradial.pre, testing$Flood.type)# tpr = 0.7514