-
Notifications
You must be signed in to change notification settings - Fork 0
/
dstc2_intent_example.py
94 lines (78 loc) · 2.58 KB
/
dstc2_intent_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from keras.backend.tensorflow_backend import set_session
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.visible_device_list = '0'
set_session(tf.Session(config=config))
import pandas as pd
import numpy as np
from sklearn.metrics import precision_recall_fscore_support
import os
from dstc2_intent_prediction_function import IntentRecognizerDSTC2
import re
SEED = 23
np.random.seed(SEED)
tf.set_random_seed(SEED)
def predictions2text(predictions):
global intents
y = []
for sample in predictions:
to_add = np.where(sample > confident_threshold)[0]
if len(to_add) > 0:
y.append(intents[to_add])
else:
y.append([intents[np.argmax(sample)]])
y = np.asarray(y)
return y
def text2predictions(predictions):
global intents, n_classes
eye = np.eye(n_classes)
y = []
for sample in predictions:
curr = np.zeros(n_classes)
for intent in sample:
curr += eye[np.where(intents == intent)[0]].reshape(-1)
y.append(curr)
y = np.asarray(y)
return y
def proba2labels(predictions):
global confident_threshold
labels = predictions.copy()
for i in range(predictions.shape[0]):
labels[i, :] = 1 * (labels[i, :] > confident_threshold)
return labels
def report(true, predicts):
global intents
f1_scores = []
for i in range(intents.shape[0]):
m = precision_recall_fscore_support(true[:, i], predicts[:, i], average='binary', pos_label=1)
f1_scores.append(m[2])
return f1_scores
dpath = '/home/dilyara/data/data_files/dstc2/dstc2_intent_model'
test_data = pd.read_csv(os.path.join(dpath, 'dstc2_test.csv'))
print(test_data.head())
f = open(os.path.join(dpath, 'intents.txt'), 'r')
lines = f.readlines()
intents = np.array([line.rstrip() for line in lines])
n_classes = intents.shape[0]
f.close()
test_intents = []
for i in range(test_data.shape[0]):
split = test_data.loc[i, 'intents'].split(' ')
for j in range(len(split)):
if split[j] not in intents:
split[j] = 'unknown'
test_intents.append(split)
test_requests = test_data.loc[:, 'text'].values
test_classes = text2predictions(test_intents)
text_size = 15
embedding_size = 300
kernel_sizes=[1,2,3]
confident_threshold = 0.5
recognizer = IntentRecognizerDSTC2(dpath=dpath)
test_predictions = recognizer.predict(requests=test_requests)
print(test_classes)
f1_test = report(test_classes, proba2labels(test_predictions))
print('Test F1-scores per intent: ', f1_test)
mean_f1 = np.mean(f1_test)
print('Test mean F1:', mean_f1)