-
Notifications
You must be signed in to change notification settings - Fork 4
/
model.py
68 lines (50 loc) · 2.07 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import os
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn import metrics, model_selection
class Model:
def __init__(self, **kwargs):
params = {
'eta': 0.02,
'max_depth': 8,
'min_child_weight': 1,
'objective': 'multi:softprob',
'eval_metric': 'mlogloss',
'num_class': 9,
'subsample': 0.8,
'colsample_bytree': 0.8,
'silent': True
}
for key in kwargs:
params[key] = kwargs[key]
self.params = params
def cv(self, train, y, test, fold=5, repeat=3):
preds = []
print('Will do {fold}-fold cv for {repeat} times'.format(fold=fold, repeat=repeat))
all_scores = []
for shift in range(0, 100*repeat, 100):
scores = []
for i in range(fold):
params = self.params
params['seed'] = i + shift
x1, x2, y1, y2 = model_selection.train_test_split(
train, y, test_size=0.15,
random_state=i+shift)
watchlist = [(xgb.DMatrix(x1, y1), 'train'),
(xgb.DMatrix(x2, y2), 'valid')]
model = xgb.train(
params, xgb.DMatrix(x1, y1), 1000,
watchlist, verbose_eval=100,
early_stopping_rounds=50)
score = metrics.log_loss(
y2, model.predict(xgb.DMatrix(x2)),
labels = list(range(9)))
print('\nlogloss for this fold:', score, '\n')
scores.append(score)
pred_test = model.predict(xgb.DMatrix(test))
preds.append(pred_test)
print('\nlogloss for this CV:', np.mean(scores), '\n')
all_scores += scores
print('average logloss for this model: ', np.mean(all_scores), '\n\n')
return np.asarray(preds)