forked from alberto-agudo/DeepLearning_MovieReviews
-
Notifications
You must be signed in to change notification settings - Fork 0
/
project_models.py
777 lines (650 loc) · 32.9 KB
/
project_models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
# IMPORTS
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.layers import Dense, BatchNormalization, Reshape, Activation
from tensorflow.keras.layers import Flatten, Embedding, Input, GRU, Bidirectional
from tensorflow.keras.layers import LSTM, SimpleRNNCell, RNN
from tensorflow.keras.layers import Conv1D, MaxPooling1D
from tensorflow.keras.regularizers import l1_l2
from tensorflow.keras import Sequential
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.initializers import Constant
import spacy
from sklearn.metrics import f1_score, roc_auc_score
import project_functions as pf
# -------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------
# GENERAL
def pred_labels(model, X_test):
"""
Function to make predictions for given X data and instead of
returning raw probabilities, returns the predictions in label
form: 0 = true news, 1 = fake news.
----------------------------------------------------------------------
model = a trained model to predict with
X_test = array of features you want to predict from
"""
# Make predictions / get raw probabilities
raw_preds = model.predict(X_test)
# Convert probabilities into labels
preds = (raw_preds > 0.5).astype(int).reshape(-1)
return preds
def pred_text_as_labels(text, tokenizer,
model, maxlen = 500,
truncating = "post",
as_labels = True):
"""
Function to make predictions for new texts/articles using a fitted tokenizer
and a fitted model. Returns either label predictions (0/1) or probabilities
for whether the articles are true/fake, depending on as_labels arg.
----------------------------------------------------------------------
text = list of strings, texts/articles you want to try and classify
tokenizer = a fitted Keras Tokenizer() object
model = a trained model to predict with
max_len = int, maximum length of sequences
padding = str, whether sequences should be padded at the front or back
truncating = str, whether sequences should be truncated at the front or back
as_labels = bool, if True will return model predictions in label form- 0 = true and
1 = fake. If False, gives the raw probabilities.
"""
# Convert text to sequences
seqs = tokenizer.texts_to_sequences(text)
# Pad/trim sequences
padded_seqs = pad_sequences(seqs, maxlen = maxlen, truncating = truncating)
# Make predictions
raw_preds = model.predict(padded_seqs)
if as_labels:
# Convert from probabilities into labels
preds = (raw_preds > 0.5).astype(int).reshape(-1)
return preds
else:
return raw_preds
def get_test_metrics(model, X_test, y_test, all_results,
history,
name = "mlp",
embedding = None,
regularize = False,
batch_normalize = False,
verbose = 0):
"""
Evaluates a fitted model against test data in terms of accuracy,
ROC AUC score and F1 score. Must be called after the embedding layer,
regularize toggle and batch_normalize toggle have all been instantiated.
Returns a df of model name, specs and test metrics.
----------------------------------------------------------------------
model = a trained model to predict with
X_test = array of padded text sequences
y_test = int, 0/1 labels for true/fake news
all_results = pd.DataFrame, either empty or containing rows of
other models' results
history = Keras history object
name = str, the name of the architecture being used
embedding = str/None, name of embedding layer used in embedding dict
regularize = bool, whether or not model was regularized
batch_normalize = bool, whether or not model was batch normalized
verbose = int, controls messaging of model.evaluate
"""
# Get number of epochs run for
n_epochs = len(history.history["loss"])
# Get test accuracy
test_acc = model.evaluate(X_test, y_test, verbose = verbose)[1]
# Get raw predictions / probabilities for ROC AUC
probs = model.predict(X_test)
test_roc_auc = roc_auc_score(y_test, probs)
# Get label predictions for F1
preds = pred_labels(model, X_test)
test_f1 = f1_score(y_test, preds)
# Save results
results = pd.DataFrame({"name":name,
"embedding":embedding,
"regularize":regularize,
"batch_normalize":batch_normalize,
"accuracy":test_acc,
"roc_auc":test_roc_auc, "f1":test_f1,
"epochs":n_epochs}, index = [0])
# Store results with others
all_results = pd.concat([all_results, results])
return all_results
def fit_and_save(X_train, y_train, model,
name = "mlp",
embedding = None,
regularize = False,
batch_normalize = False,
save_model = False,
save_history = True,
**kwargs):
"""
Fits a Keras model and if desired, save both the model and its history
into folders following the naming convention models/name/
embedding_{additional_layers}. Returns a Keras history object.
Designed to work in analysis NB when regularise and batch_normalize
have been instantiated.
----------------------------------------------------------------------
X_train = array of padded text sequences
y_train = int, 0/1 labels for true/fake news
model = a trained model to predict with
name = str, the name of the architecture being used
embedding_layer = str/None, name of embedding layer used in embedding dict
regularize = bool, whether or not model was regularized
batch_normalize = bool, whether or not model was batch normalized
save = bool, whether or not to save history and model
**kwargs = fit_hp dict of args that Keras model.fit can take
"""
history = model.fit(X_train, y_train, **kwargs)
# Make filepath
save_path = str(embedding)
if regularize:
save_path += "_reg"
if batch_normalize:
save_path += "_bn"
# Save model if desired
if save_model:
# Create folder if it doesn't exist
if not os.path.exists(f"models/{name}"):
os.makedirs(f"models/{name}")
model.save(f"models/{name}/{save_path}")
# Save model if desired
if save_history:
# Make into df
hist_df = pd.DataFrame(history.history)
# Create folder if it doesn't exist
if not os.path.exists(f"histories/{name}"):
os.makedirs(f"histories/{name}")
hist_df.to_csv(f"histories/{name}/{save_path}.csv", index = False)
return history
# -------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------
# Embeddings
def spacy_embedding(tokenizer, maxlen = 500, show_progress = False):
"""Function to create SpaCy embedding layer. Uses the en_core_web_sm pipeline,
a small English-language pipeline appropriate for blogs, news and comments.
This takes a while to run.
----------------------------------------------------------------------
tokenizer = a fitted Keras Tokenizer() object
max_len = int, maximum length of sequences
show_progress = bool, simple indicator to tell you how much progress with
the embedding you have made as %.
"""
# Load the spacy pipeline
# small English pipeline trained on written web text (blogs, news, comments)
nlp = spacy.load("en_core_web_sm")
# Get vocab size of tokenizer
vocab_size = len(tokenizer.word_index) + 1
# Get the number of embedding dimensions SpaCy uses
embedding_dim = nlp("any_word").vector.shape[0]
# Create a matrix to use in embedding layer
embedding_matrix = np.zeros((vocab_size, embedding_dim))
# Iterate through our vocabulary, mapping words to spacy embedding
# this will take a while to run
for i, word in enumerate(tokenizer.word_index):
embedding_matrix[i] = nlp(word).vector
# Show progress if desired
if show_progress:
if i % 10000 == 0 and i > 0:
print(round(i*100/vocab_size, 3), "% complete")
# Load the embedding matrix as the weights matrix for the embedding layer
# Set trainable to False as the layer is already "learned"
Embedding_layer = Embedding(
vocab_size,
embedding_dim,
input_length = maxlen,
embeddings_initializer=Constant(embedding_matrix),
trainable=False,
name = "spacy_embedding")
return Embedding_layer
def glove_embedding(tokenizer,
filepath = "../glove/glove.6B.300d.txt",
maxlen = 500,
show_progress = False):
"""Function to create GloVe embedding layer. Uses the Wikipedia 2014 and Gigaword 5 dataset.
It's trained in English and features News data so is appropriate for this task.
----------------------------------------------------------------------
tokenizer = a fitted Keras Tokenizer() object
filepath = str, path to the glove pre-trained vector file
max_len = int, maximum length of sequences
show_progress = bool, simple indicator to tell you how much progress with
the embedding you have made as %.
"""
# Create dict to store glove embeddings in- word:vector
glove_embeddings = {}
# Load the GloVe embeddings
# trained on combination of wikipedia and news data (English language)
with open(filepath, 'r', encoding="utf-8") as f:
for line in f:
values = line.split()
word = values[0]
vector = np.asarray(values[1:], "float32")
glove_embeddings[word] = vector
# Get vocab size of tokenizer
vocab_size = len(tokenizer.word_index) + 1
# Get the number of embedding dimensions GloVe uses
embedding_dim = vector.shape[0]
# Create a matrix to use in embedding layer
embedding_matrix = np.zeros((vocab_size, embedding_dim))
# Iterate through our vocabulary, mapping words to GloVe embedding
for i, word in enumerate(tokenizer.word_index):
# Try to find corresponding vector for word, else return None
embedding_vector = glove_embeddings.get(word)
# If word exists, update matrix with vector
# Words that couldn't be mapped are 0s in the matrix
if embedding_vector is not None:
embedding_matrix[i] = glove_embeddings[word]
# Display progress if desired
if show_progress:
if (i % 10000 == 0 and i > 0):
print(round(i*100/vocab_size, 3), "% complete")
# Load the embedding matrix as the weights matrix for the embedding layer
# Set trainable to False as the layer is already "learned"
Embedding_layer = Embedding(
vocab_size,
embedding_dim,
input_length = maxlen,
embeddings_initializer=Constant(embedding_matrix),
trainable=False,
name = "glove_embedding")
return Embedding_layer
def keras_embedding(tokenizer, embedding_dim = 256, maxlen = 500):
"""Function to create a custom Keras embedding layer.
----------------------------------------------------------------------
tokenizer = a fitted Keras Tokenizer() object
max_len = int, maximum length of sequences
"""
# Get vocab size of tokenizer
vocab_size = len(tokenizer.word_index) + 1
# Load the embedding matrix as the weights matrix for the embedding layer
# Set trainable to False as the layer is already "learned"
Embedding_layer = Embedding(
vocab_size,
embedding_dim,
input_length = maxlen,
name = "keras_embedding")
return Embedding_layer
# -------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------
# MLP
def mlp(loss = "binary_crossentropy",
optimizer = "adam",
metrics = ["accuracy"], regularize = False,
batch_normalize = False,
embedding = None,
maxlen = 500,
hidden_dense_units = 256,
dense_kernel_initializer = "he_uniform"):
"""
Creates an MLP designed to be used with the text data only.
Can be built with either Keras, SpaCy, GloVe or no embedding.
Returns a compiled Keras model of 3 Dense layers (1024, 256, 1). There are
options to include elasticnet regularization and batch normalisation layers too.
----------------------------------------------------------------------
loss = str, name of loss function to use
optimizer = Keras optimizer, set to 'adam' but any optimizer can be passed
metrics = list of Keras metrics to use to evaluate with
regularize = bool, if True adds elasticnet/l1_l2 regularisation with
l1 = 0.01 and l2 = 0.01
batch_normalize = bool, if True adds batch normalisation between hidden Dense
and output layer.
embedding = None/Keras embedding instance: The type of embedding to use (SpaCy,
GloVe, Keras or none).
maxlen = int, shape of input (length of sequences)
hidden_dense_units = int, number of hidden units in the hidden dense layer.
dense_kernel_initializer = str or keras.initializers object for the weights
of the Dense layer.
"""
# Build model
model = Sequential(name = "MLP")
# Add embedding if desired
if embedding:
# Embedding contains input shape
model.add(embedding)
# Flatten embeddings
model.add(Flatten())
else:
model.add(Input(shape = (maxlen, ), name = "Input"))
# Elasticnet regularised model
if regularize:
model.add(Dense(hidden_dense_units, name = "Linear_Dense_Elasti",
kernel_regularizer = l1_l2(),
kernel_initializer = dense_kernel_initializer))
# Baseline model
else:
model.add(Dense(hidden_dense_units, name = "Linear_Dense",
kernel_initializer = dense_kernel_initializer))
# Batch normalised model
if batch_normalize:
model.add(BatchNormalization(name = "Batch_Norm1"))
# Apply non-linear activation, specified in this way to be consistent
# with the original paper
model.add(Activation("relu", name = "ReLU_Activation"))
# Output layer
model.add(Dense(1, activation = "sigmoid", name = "Output",
kernel_initializer = dense_kernel_initializer))
# Compile model
model.compile(loss = loss, optimizer = optimizer,
metrics = metrics)
return model
# -------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------
# RNN
def bi_rnn(embedding = None, maxlen = 500,
rnn_units = 32, rnn_kernel_initializer = "glorot_uniform",
hidden_dense_units = 256, dense_kernel_initializer = "he_uniform",
regularize = False, batch_normalize = False,
loss = 'binary_crossentropy', optimizer = 'adam',
metrics = ['accuracy']):
"""
Instantiates a bidirectional RNN model used to detect fake news. The basic
structure includes one of three possible embeddings (Keras trainable embedding,
GloVe, and SpaCy), plus no embedding (the default). Afterwards, there is one
bidirectional recurrent layer that relies on rnn gates.
After the RNN layer there is another fully-connected layer to expand
the interactions of the RNN output before the final output layer. A ReLu
activation function is applied here.
Finally, since the task is binary classification, a dense layer with sigmoid
activation function is included.
Summary of the architecture:
Embedding - Bidirectional RNN 32 * 2 units - Dense layer 256 units with ReLu -
Output dense layer
Further modifications can be included, either a kernel regularizer
or a batch normalization layer to improve generalization. Both
will be applied either in or right after the hidden dense layer.
Finally, this function compiles and returns the model.
----------------------------------------------------------------------
embedding = Embedding matrix object already loaded into memory
maxlen = Maximum length of each padded sequence (interpreted as number of timesteps)
rnn_units = int, number of hidden units in the recurrent computation of RNN
rnn_kernel_initializer = str or keras.initializers object for the weights
of the RNN layer. Default as specified in the keras function.
hidden_dense_units = int, number of hidden units in the hidden dense layer
dense_kernel_initializer = str or keras.initializers object for the weights
of the Dense layer. Default as specified in the keras function.
regularize = bool, application of elasticnet to the weights of the hidden
dense layer through the use of the l1_l2() keras function,
with l1 = 0.01 and l2 = 0.01
batch_normalize = bool, application of Batch Normalization after the hidden
dense layer
loss = str or keras.losses object, specifies the loss function
optimizer = str or keras.optimizers object
metrics = list of str or keras.metrics objects, metrics to be calculated
"""
# Set seed to ensure reproducibility across different notebooks (this
# can be changed by the user)
model = Sequential(name = "RNN")
# a) Embeddings: Add the kind of embedding the user requires.
# 1. Any embedding as stored in the kernel by the user (such as GloVe or Keras):
if embedding:
model.add(embedding)
# 2. Default: No embedding. We specify the shape as
# (number of timesteps, n_features). With only text, n_features = 1
# Batch_size is already guessed by TF while fitting.
else:
model.add(Input(shape = (maxlen, 1), name = "Input"))
# b) RNN bidirectional layer
# First we instantiate the basic rnn_cell, which is the basic component
# of the RNN layer. Then, we create a Bidirectional RNN layer
rnn_cell = SimpleRNNCell(rnn_units, kernel_initializer = rnn_kernel_initializer)
model.add(Bidirectional(RNN(rnn_cell), name = "Bidirectional_RNN"))
# c) Densely connected layer, where regularization can be applied
if regularize:
model.add(Dense(hidden_dense_units,
kernel_initializer = dense_kernel_initializer,
kernel_regularizer = l1_l2(),
name = "Regularized_hidden_dense"))
else:
model.add(Dense(hidden_dense_units,
kernel_initializer = dense_kernel_initializer,
name = "Hidden_dense"))
# d) Batch normalization layer, added if specified:
if batch_normalize:
model.add(BatchNormalization(name = "Batch_normalization"))
# Activation function of the dense layer, applied after batch normalization
# in case this has been specified.
model.add(Activation("relu", name = "ReLu_activation"))
# e) Final Dense layer:
model.add(Dense(1, activation = "sigmoid",
kernel_initializer = dense_kernel_initializer,
name = "Output"))
# Compile the model with the user specifications:
model.compile(loss = loss, optimizer = optimizer, metrics = metrics)
return model
# -------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------
# LSTM
def bi_lstm(embedding = None, maxlen = 500,
rnn_units = 32, rnn_kernel_initializer = "glorot_uniform",
hidden_dense_units = 256, dense_kernel_initializer = "he_uniform",
regularize = False, batch_normalize = False,
loss = 'binary_crossentropy', optimizer = 'adam',
metrics = ['accuracy']):
"""
Instantiates a bidirectional LSTM model used to detect fake news. The basic
structure includes one of three possible embeddings (Keras trainable embedding,
GloVe, and SpaCy), plus no embedding (the default). Afterwards, there is one
bidirectional recurrent layer that relies on LSTM gates.
After the LSTM layer there is another fully-connected layer to expand
the interactions of the LSTM output before the final output layer. A ReLu
activation function is applied here.
Finally, since the task is binary classification, a dense layer with sigmoid
activation function is included.
Summary of the architecture:
Embedding - Bidirectional LSTM 32 * 2 units - Dense layer 256 units with ReLu -
Output dense layer
Further modifications can be included, either a kernel regularizer
or a batch normalization layer to improve generalization. Both
will be applied either in or right after the hidden dense layer.
Finally, this function compiles and returns the model.
----------------------------------------------------------------------
embedding = Embedding matrix object already loaded into memory
maxlen = Maximum length of each padded sequence (interpreted as number of timesteps)
rnn_units = int, number of hidden units in the recurrent computation of LSTM
rnn_kernel_initializer = str or keras.initializers object for the weights
of the LSTM layer. Default as specified in the keras function.
hidden_dense_units = int, number of hidden units in the hidden dense layer
dense_kernel_initializer = str or keras.initializers object for the weights
of the Dense layer. Default as specified in the keras function.
regularize = bool, application of elasticnet to the weights of the hidden
dense layer through the use of the l1_l2() keras function,
with l1 = 0.01 and l2 = 0.01
batch_normalize = bool, application of Batch Normalization after the hidden
dense layer
loss = str or keras.losses object, specifies the loss function
optimizer = str or keras.optimizers object
metrics = list of str or keras.metrics objects, metrics to be calculated
"""
# Set seed to ensure reproducibility across different notebooks (this
# can be changed by the user)
model = Sequential(name = "LSTM")
# a) Embeddings: Add the kind of embedding the user requires.
# 1. Any embedding as stored in the kernel by the user (such as GloVe or Keras):
if embedding:
model.add(embedding)
# 2. Default: No embedding. We specify the shape as
# (number of timesteps, n_features). With only text, n_features = 1
# Batch_size is already guessed by TF while fitting.
else:
model.add(Input(shape = (maxlen, 1), name = "Input"))
# b) LSTM bidirectional layer
model.add(Bidirectional(LSTM(rnn_units,
kernel_initializer = rnn_kernel_initializer),
name = "Bidirectional_LSTM"))
# c) Densely connected layer, where regularization can be applied
if regularize:
model.add(Dense(hidden_dense_units,
kernel_initializer = dense_kernel_initializer,
kernel_regularizer = l1_l2(),
name = "Regularized_hidden_dense"))
else:
model.add(Dense(hidden_dense_units,
kernel_initializer = dense_kernel_initializer,
name = "Hidden_dense"))
# d) Batch normalization layer, added if specified:
if batch_normalize:
model.add(BatchNormalization(name = "Batch_normalization"))
# Activation function of the dense layer, applied after batch normalization
# in case this has been specified.
model.add(Activation("relu", name = "ReLu_activation"))
# e) Final Dense layer:
model.add(Dense(1, activation = "sigmoid",
kernel_initializer = dense_kernel_initializer,
name = "Output"))
# Compile the model with the user specifications:
model.compile(loss = loss, optimizer = optimizer, metrics = metrics)
return model
# -------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------
# GRU
def bi_gru(loss = "binary_crossentropy",
optimizer = "adam",
metrics = ["accuracy"], regularize = False,
batch_normalize = False,
embedding = None,
maxlen = 500,
hidden_dense_units = 256,
dense_kernel_initializer = "glorot_uniform",
rnn_units = 32,
rnn_kernel_initializer = "glorot_uniform"):
"""
Creates a GRU model designed to be used with the text data only.
Can be built with either Keras, SpaCy, GloVe or no embedding.
Returns a compiled Keras model of a bidirectional GRU (32*2) and 2 Dense layers (256, 1).
There are options to include elasticnet regularization and batch normalisation layers too.
----------------------------------------------------------------------
loss = str, name of loss function to use
optimizer = Keras optimizer, set to 'adam' but any optimizer can be passed
metrics = list of Keras metrics to use to evaluate with
regularize = bool, if True adds elasticnet/l1_l2 regularisation with
l1 = 0.01 and l2 = 0.01
batch_normalize = bool, if True adds batch normalisation between hidden Dense
and output layer.
embedding = None/Keras embedding instance: The type of embedding to use (SpaCy,
GloVe, Keras or none).
maxlen = int, shape of input (length of sequences)
hidden_dense_units = int, number of hidden units in the hidden dense layer
dense_kernel_initializer = str or keras.initializers object for the weights
of the Dense layer.
rnn_units = int, number of hidden units in the recurrent computation of GRU.
rnn_kernel_initializer = str or keras.initializers object for the weights
of the GRU layer.
"""
# Build model
model = Sequential(name = "GRU")
# Add embedding if desired
if embedding:
# Embedding contains input shape
model.add(embedding)
else:
# Otherwise reshape data to work with GRU
model.add(Reshape((maxlen, 1), input_shape = (maxlen, ), name = "Reshaping"))
# Add GRU
model.add(Bidirectional(GRU(rnn_units,
kernel_initializer = rnn_kernel_initializer),
name = "Bidirectional_GRU"))
# Elasticnet regularised model
if regularize:
model.add(Dense(hidden_dense_units, name = "Linear_Dense_Elasti",
kernel_regularizer = l1_l2(),
kernel_initializer = dense_kernel_initializer))
# Baseline model
else:
model.add(Dense(hidden_dense_units, name = "Linear_Dense",
kernel_initializer = dense_kernel_initializer))
# Batch normalised model
if batch_normalize:
model.add(BatchNormalization(name = "Batch_Norm1"))
# Apply non-linear activation, specified in this way to be consistent
# with the original paper
model.add(Activation("relu", name = "ReLU_Activation"))
# Output layer
model.add(Dense(1, activation = "sigmoid", name = "Output",
kernel_initializer = dense_kernel_initializer))
# Compile model
model.compile(loss = loss, optimizer = optimizer,
metrics = metrics)
return model
# -------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------
# CNN
def cnn(loss = "binary_crossentropy",
optimizer = "adam",
metrics = ["accuracy"], regularize = False,
batch_normalize = False,
embedding = None,
maxlen = 500,
conv_filters = 256,
conv_kernel_size = 8,
pool_size = 2,
hidden_dense_units = 256,
dense_kernel_initializer = "he_uniform"):
"""
Creates a Convolutional Neural Network designed to be used with the text data only.
Can be built with either Keras, SpaCy, GloVe or no embedding.
Returns a compiled Keras model of 1 Conv1D layer, 1 MaxPooling1D layer, and 2 Dense layers (256, 1).
There are options to include elasticnet regularization and batch normalisation layers too.
----------------------------------------------------------------------
loss = str, name of loss function to use
optimizer = Keras optimizer, set to 'adam' but any optimizer can be passed
metrics = list of Keras metrics to use to evaluate with
regularize = bool, if True adds elasticnet/l1_l2 regularisation with
l1 = 0.01 and l2 = 0.01
batch_normalize = bool, if True adds batch normalisation between hidden Dense
and output layer.
embedding = None/Keras embedding instance: The type of embedding to use (SpaCy,
GloVe, Keras or none).
maxlen = int, shape of input (length of sequences).
conv_filters = int, the dimensionality of the Convolutional layer output space.
conv_kernel_size = int, specifying the length of the 1D convolution window.
pool_size = int, size of the max pooling window.
hidden_dense_units = int, number of hidden units in the hidden dense layer.
dense_kernel_initializer = str or keras.initializers object for the weights
of the Dense layer.
"""
# Build model
model = Sequential(name = "CNN")
# Add embedding if desired
if embedding:
# Embedding contains input shape
model.add(embedding)
else:
model.add(Reshape((maxlen, 1), input_shape = (maxlen, ), name = "Reshaping"))
#Add Conv1D Layer
model.add(Conv1D(filters=conv_filters, kernel_size=conv_kernel_size, activation='relu', name = "Conv_layer"))
#Add MaxPooling1D Layer
model.add(MaxPooling1D(pool_size=pool_size))
#Flatten embeddings before passing to Dense layers
model.add(Flatten())
# Elasticnet regularised model
if regularize:
model.add(Dense(hidden_dense_units, name = "Linear_Dense_Elasti",
kernel_regularizer = l1_l2(),
kernel_initializer = dense_kernel_initializer))
# Baseline model
else:
model.add(Dense(hidden_dense_units, name = "Linear_Dense",
kernel_initializer = dense_kernel_initializer))
# Batch normalised model
if batch_normalize:
model.add(BatchNormalization(name = "Batch_Norm1"))
# Apply non-linear activation, specified in this way to be consistent
# with the original paper
model.add(Activation("relu", name = "ReLU_Activation"))
# Output layer
model.add(Dense(1, activation = "sigmoid", name = "Output",
kernel_initializer = dense_kernel_initializer))
# Compile model
model.compile(loss = loss, optimizer = optimizer,
metrics = metrics)
return model