-
Notifications
You must be signed in to change notification settings - Fork 0
/
kerastools.py
130 lines (102 loc) · 4.91 KB
/
kerastools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
'''
@author Vineeth_Bhaskara
'''
# For Python 2
# Tested on Keras 2.0.8 on Python 2
from sklearn.metrics import roc_auc_score, log_loss
from keras.callbacks import Callback
from keras.utils.data_utils import Sequence
class ModelCheckpointAUC(keras.callbacks.Callback):
'''
Save the model at the filepath specified. Pass val_data as a tuple of numpy data and labels, val_data=(val_x,val_y).
For FP/TP sensitive problems, we would like to Save/Monitor by the AUC score to save the model.
'''
def __init__(self, val_data, filepath=None, save_model=False, train_data=None, batch_size=256, logfile='./keras_log.log'):
self.logfile=logfile
self.batch_size=batch_size
self.validation_data = DataGen(val_data[0], val_data[1], batch_size=self.batch_size, shuffle=False,
onehot_y=True)
self.true_y = val_data[1]
if train_data is not None:
self.true_y_train = train_data[1]
self.train_data = DataGen(train_data[0], train_data[1], batch_size=self.batch_size, shuffle=False,
onehot_y=True)
self.save_model=False
if save_model:
self.filepath = filepath
self.save_model = True
with open(self.logfile, 'a') as f:
f.write('\n\n===================================\n')
f.write('TR_LOSS\tVAL_LOSS\tTR_AUC\tVAL_AUC\n')
def on_train_begin(self, logs={}):
self.aucs = [] # validation auc history
self.losses = []
self.vallloss = []
self.max_auc = -1 # absurd init value
self.trainlloss = []
self.trainaucs = []
def on_train_end(self, logs={}):
print 'Validation AUC History: ', self.aucs
print 'Validation Loss History: ', self.vallloss
print 'Train AUC History: ', self.trainaucs
print 'Train Loss History: ', self.trainlloss
return
def on_epoch_begin(self, epoch, logs={}):
return
def on_epoch_end(self, epoch, logs={}):
self.losses.append(logs.get('loss'))
y_pred = self.model.predict_generator(self.validation_data, len(self.validation_data))
auc_now_val = roc_auc_score(self.true_y, y_pred)
lloss_now_val = log_loss(self.true_y, y_pred)
self.aucs.append(auc_now_val)
self.vallloss.append(lloss_now_val)
y_pred_train = self.model.predict_generator(self.train_data, len(self.train_data))
auc_now_tr = roc_auc_score(self.true_y_train, y_pred_train)
lloss_now_tr = log_loss(self.true_y_train, y_pred_train)
self.trainaucs.append(auc_now_tr)
self.trainlloss.append(lloss_now_tr)
with open(self.logfile, 'a') as f:
# 'TR_LOSS\tVAL_LOSS\tTR_AUC\tVAL_AUC\n'
f.write('{}\t{}\t{}\t{}\n'.format(lloss_now_tr, lloss_now_val, auc_now_tr, auc_now_val))
print '\nEpoch Metrics: train_auc: {}, train_loss: {}, val_auc: {}, val_loss: {}'.format(str(round(auc_now_tr,4)), str(round(lloss_now_tr,4)), str(round(auc_now_val,4)), str(round(lloss_now_val,4))),'\n'
if self.save_model:
if self.max_auc < auc_now_val:
print('Saving model to '+self.filepath+' as better val auc.')
self.model.save(self.filepath, overwrite=True)
self.max_auc = auc_now_val
# also note the metric down
with open(self.filepath+'.metrics.txt', 'w') as f:
f.write('VAL AUC: ' + str(auc_now_val) + '\n')
f.write('VAL Loss: ' + str(lloss_now_val) + '\n')
f.write('Train AUC: ' + str(auc_now_tr) + '\n')
f.write('Train Loss: ' + str(lloss_now_tr) + '\n')
return
def on_batch_begin(self, batch, logs={}):
return
def on_batch_end(self, batch, logs={}):
return
class DataGen(Sequence):
'''
Just pass in numpy arrays of data and there you go! You get a generator that will return batches of data
w/wo shuffling. You may add more transformations here if you need.
'''
def __init__(self, x_set, y_set, batch_size, shuffle=True, onehot_y=True, seed=28081994):
np.random.seed(seed)
self.x, self.y = x_set, y_set
self.batch_size = batch_size
if shuffle:
tot = np.arange(self.x.shape[0])
np.random.shuffle(tot)
self.x = self.x[tot, :]
if onehot_y:
self.y = self.y[tot, :]
else:
self.y = self.y[tot]
def __len__(self):
return int(np.ceil(len(self.x) / float(self.batch_size)))
def on_epoch_end(self):
pass
def __getitem__(self, idx):
batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
return np.array(batch_x), np.array(batch_y)