-
Notifications
You must be signed in to change notification settings - Fork 0
/
04-ksd_train.r
51 lines (42 loc) · 1.71 KB
/
04-ksd_train.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
source("libraries.R")
source("functions.R")
# outlier detection function
source("resources/external/maronna/KurtSDNew.R")
# NOTAS:
# AVERIGUAR SI SE PUEDEN INCLUIR VARIABLES BINARIAS/CATEGORICAS (COMO DUMMIES)
# si se puede, incluir:
# binarias: endotrachflag, rrt, vent
# categoricas: admission_type, gender, ethnicity
# categoricas ordinales: gcseyes, gcsmotor, gcsverbal
# NO ES UN ALGORITMO A ENTRENAR:
# solo se aplica en training para que sea comparable con autoencoder
# se calcula entonces:
# para todo el dataset (para comparar con auto)
# para todo el train (para meter en stacking)
# parameters --------------------------------------------------------------
# read data --------------------------------------------------------------
# raw data
base_train = readRDS("data/working/x_train_raw.rds")
base_test = readRDS("data/working/x_test_raw.rds")
# prepared data
x_train = readRDS("data/working/x_train_ksd.rds")
x_test = readRDS("data/working/x_test_ksd.rds")
# full data -------------------------------------------------------
x_full = bind_rows(x_train,x_test)
# nota: estandarizacion is done in the function
outk_full = KurtSDNew(X=x_full)
# get and save outlyingness of each obs
out_ksd_full = tibble(
id_tot = c(base_train$id_tot,base_test$id_tot)
,out = outk_full$tl[[1]]
)
saveRDS(out_ksd_full, "data/working/outliers_full_ksd.rds")
# training data -------------------------------------------------------
# nota: estandarizacion is done in the function
outk_train = KurtSDNew(X=x_train)
# get and save outlyingness of each obs
out_ksd_train = tibble(
id_tot = c(base_train$id_tot)
,out = outk_train$tl[[1]]
)
saveRDS(out_ksd_train, "data/working/outliers_train_ksd.rds")