-
Notifications
You must be signed in to change notification settings - Fork 0
/
GA_filter.py
746 lines (540 loc) · 30.4 KB
/
GA_filter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
import pandas as pd
import numpy as np
import math
from matplotlib.ticker import MaxNLocator
from sklearn import model_selection
from sklearn.linear_model import LinearRegression
from sklearn.svm import LinearSVR
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor
from xgboost.sklearn import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.feature_selection import f_regression, mutual_info_regression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error
from sklearn.feature_selection import RFECV
import matplotlib.pyplot as plt
import random
import warnings
from time import time
warnings.filterwarnings('ignore')
np.set_printoptions(threshold=np.inf)
class GA(object):
def __init__(self, x, y, model, purpose, population_size, iter_num, pc, pm,
params=None, params_bound=None, cv=5, filter=None):
# 模型数据集
self.x = x
self.y = y
# 机器学习模型参数 #
# 确定机器学习模型, KNN或者是MLP
self.model = model
# GA用于机器学习的功能:调参Parameter还是特征选择Feature
self.purpose = purpose
# 如果是Feature, 则染色体不需要解码,可用来描述特征选择
# 用作特征选择的算法采取的默认参数
self.params = params
# GA特征筛选之前是否进行过滤法
# 过滤法可选F value或者Mutual Information
self.filter = filter # filter=F or filter = MI
# 过滤法的上下界
# 超过上界的全部选取 (>= Q3)
# 低于下界的全部舍弃 (<= Q1)
# 如果是Parameter
self.params_bound = params_bound # 需指明参数的上下界
# 机器学习performance evaluation的cv
self.cv = cv
# GA代码参数 #
# 种群数量 #
self.population_size = population_size
# 遗传代数限制
self.iter_num = iter_num
# 交叉和变异概率
self.pc = pc # 交叉概率crossover
self.pm = pm # 变异概率mutation
# 初始化种群
def population_generate(self):
if self.purpose == 'Parameter': # GA用作调参
# 构建空数组准备存储种群
# 确定参数的染色体长度
# 如果是整数,根据区间数量计算
chromosome_length = np.arange(len(self.params_bound))
# 参数个数
for i in range(len(chromosome_length)):
tmp = math.ceil(math.log2(list(self.params_bound.values())[i][1] - list(self.params_bound.values())[i][0] + 1))
chromosome_length[i] = tmp
population = np.empty(shape=(self.population_size, np.sum(chromosome_length))).astype(int)
# 构建种群
for i in range(self.population_size):
random_chromosome = np.random.randint(2, size=sum(chromosome_length))
population[i, ::] = random_chromosome
return chromosome_length, population.astype(int)
elif self.purpose == 'Feature': # GA用作特征选择
feature_num = self.x.shape[1]
if self.filter == 'F' or self.filter == 'MI':
# 如果在GA调参之前采取过滤法
if self.filter == 'F':
# 计算所有X与Y的 F score
correlation, _ = f_regression(self.x, self.y)
elif self.filter == 'MI':
# 计算所有X与Y的 MI值
correlation = mutual_info_regression(self.x, self.y)
correlation /= np.max(correlation)
# 找出小于filter_lower的索引,直接删去
lower_features = np.where(correlation <= np.percentile(correlation, 25))[0] # 特征序号
# 找出大于filter_upper的索引,直接保留
upper_features = np.where(correlation >= np.percentile(correlation, 75))[0] # 特征序号
# 剩余feature的索引
ga_features = np.delete(np.arange(0, feature_num), np.append(lower_features, upper_features))
chromosome_length = len(ga_features)
population = np.empty(shape=(self.population_size, chromosome_length))
for i in range(self.population_size):
# 针对低于filter_upper和高于filter_lower的feature进行GA启发式选择
left_feature_index = np.random.randint(2, size=chromosome_length)
population[i, ::] = left_feature_index
return upper_features, ga_features, population.astype(int)
else:
# 不使用过滤法
chromosome_length = feature_num
population = np.empty(shape=(self.population_size, chromosome_length))
for i in range(self.population_size):
all_feature_index = np.random.randint(2, size=chromosome_length)
population[i, ::] = all_feature_index
return population.astype(int)
elif self.purpose == 'Parameter+Feature':
# 针对调参的种群构建
parameter_chromosome_length = np.arange(len(self.params_bound))
for i in range(len(self.params_bound)):
tmp = math.ceil(
math.log2(list(self.params_bound.values())[i][1] - list(self.params_bound.values())[i][0] + 1))
parameter_chromosome_length[i] = tmp
total_chromosome_length = np.sum(parameter_chromosome_length)
parameter_population = np.empty(shape=(self.population_size, total_chromosome_length))
# 构建种群
for i in range(self.population_size):
random_chromosome = np.random.randint(2, size=total_chromosome_length)
parameter_population[i, ::] = random_chromosome
# 针对特征选择的种群构建
feature_num = self.x.shape[1]
# 使用过滤法
if self.filter is not None:
if self.filter == 'F':
correlation, _ = f_regression(self.x, self.y)
elif self.filter == 'MI':
correlation = mutual_info_regression(self.x, self.y)
correlation /= np.max(correlation)
lower_features = np.where(correlation <= np.percentile(correlation, 25))[0] # 特征序号
upper_features = np.where(correlation >= np.percentile(correlation, 75))[0] # 特征序号
ga_features = np.delete(np.arange(0, feature_num), np.append(lower_features, upper_features))
feature_chromosome_length = len(ga_features)
feature_population = np.empty(shape=(self.population_size, feature_chromosome_length))
for i in range(self.population_size):
left_feature_index = np.random.randint(2, size=feature_chromosome_length)
feature_population[i, ::] = left_feature_index
# 合并种群
population = np.append(parameter_population, feature_population, axis=1).astype(int)
return parameter_chromosome_length, upper_features, ga_features, feature_population, population
# 不使用过滤法
else:
feature_chromosome_length = feature_num
feature_population = np.empty(shape=(self.population_size, feature_chromosome_length))
for i in range(self.population_size):
left_feature_index = np.random.randint(2, size=feature_chromosome_length)
feature_population[i, ::] = left_feature_index
population = np.append(parameter_population, feature_population, axis=1).astype(int)
return parameter_chromosome_length, feature_chromosome_length, feature_population, population
# 解码并得到参数或者待选择的特征
def decoding(self, population):
if self.purpose == 'Parameter': # GA用作调参
chromosome_length = self.population_generate()[0]
population_value = np.empty(shape=(self.population_size, len(self.params_bound)))
# 记录参数值
for i in range(self.population_size): # 循环种群每个个体
copied_population = population[i, ::].copy()
for j, num in enumerate(chromosome_length): # 循环一个种群个体的每个参数染色体
new_chromosome = copied_population[:num]
strs = ''.join(str(int(k)) for k in new_chromosome)
int_value = int(strs, 2)
# 参数值
upper_bound = list(self.params_bound.values())[j][1]
lower_bound = list(self.params_bound.values())[j][0]
para_value = round(lower_bound + int_value * (upper_bound - lower_bound) / (pow(2, num) - 1))
population_value[i, j] = para_value
copied_population = np.delete(copied_population, range(num))
return population_value
elif self.purpose == 'Feature': # GA用作特征选择
population_value = []
# 采用过滤法
if self.filter == 'F' or self.filter == 'MI':
# 必须选择的feature (>Q3)
must_select_features = self.population_generate()[0]
# GA选择的features
selected_ga_features = self.population_generate()[1]
for i in range(self.population_size):
selected_features = np.append(must_select_features,
selected_ga_features[population[i, ::] == 1])
population_value.append(selected_features)
# 不使用过滤法
else:
chromosome_length = population.shape[1]
for i in range(self.population_size):
selected_features = np.arange(chromosome_length)[population[i, ::] == 1]
population_value.append(selected_features)
return population_value
elif self.purpose == 'Parameter+Feature':
population_value = []
# 调参的种群数值
parameter_chromosome_length = self.population_generate()[0]
parameter_population_value = np.empty(shape=(self.population_size, len(parameter_chromosome_length)))
# 记录参数值
for i in range(self.population_size): # 循环种群每个个体
copied_population = population[i][:np.sum(parameter_chromosome_length)].copy()
for j, num in enumerate(parameter_chromosome_length): # 循环一个种群个体的每个参数染色体
new_chromosome = copied_population[:num]
strs = ''.join(str(int(k)) for k in new_chromosome)
int_value = int(strs, 2)
# 参数值
upper_bound = list(self.params_bound.values())[j][1]
lower_bound = list(self.params_bound.values())[j][0]
para_value = round(lower_bound + int_value * (upper_bound - lower_bound) / (pow(2, num) - 1))
parameter_population_value[i, j] = para_value
copied_population = np.delete(copied_population, range(num))
# 过滤法
if self.filter is not None:
must_select_features = self.population_generate()[1]
selected_ga_features = self.population_generate()[2]
feature_population = self.population_generate()[3]
for i in range(self.population_size):
selected_features = np.append(must_select_features,
selected_ga_features[feature_population[i, ::] == 1])
population_value.append(np.append(parameter_population_value[i, ::].astype(int), selected_features).astype(int))
return population_value
# 不过滤
else:
chromosome_length = self.population_generate()[2].shape[1]
feature_population = self.population_generate()[2]
for i in range(self.population_size):
selected_features = np.arange(chromosome_length)[feature_population[i, ::] == 1]
population_value.append(np.append(parameter_population_value[i, ::].astype(int), selected_features.astype(int)))
return population_value
# 计算适应度值函数
def fitness_value(self, population_value):
fitness_value = []
# 针对KNN模型
if self.model == 'KNN': # GA用作调参
if self.purpose == 'Parameter': # GA用作调参
for i in range(len(population_value)):
# 选择模型
model = KNeighborsRegressor(n_neighbors=int(population_value[i, 0]),
leaf_size=int(population_value[i, 1]),
n_jobs=5)
cv_scores = model_selection.cross_val_score(model,
self.x,
self.y,
cv=self.cv,
scoring='r2',
n_jobs=self.cv
)
# 适应值为不同参数组合下交叉验证值
fitness_value.append(cv_scores.mean())
elif self.purpose == 'Feature': # GA用作特征选择
model = KNeighborsRegressor(**self.params, n_jobs=5)
for i in range(self.population_size):
cv_scores = model_selection.cross_val_score(model,
self.x[:, population_value[i].astype(int)],
self.y,
cv=self.cv,
scoring='r2',
n_jobs=self.cv
)
fitness_value.append(cv_scores.mean())
elif self.purpose == 'Parameter+Feature':
for i in range(len(population_value)):
# 选择模型
model = KNeighborsRegressor(n_neighbors=int(population_value[i][0]),
leaf_size=int(population_value[i][1]),
n_jobs=5)
cv_scores = model_selection.cross_val_score(model,
self.x[:, population_value[i][len(self.params_bound):].astype(int)],
self.y,
cv=self.cv,
scoring='r2',
n_jobs=self.cv
)
# 适应值为不同参数组合下交叉验证值
fitness_value.append(cv_scores.mean())
# 针对MLP模型
elif self.model == 'MLP':
if self.purpose == 'Parameter': # GA用作调参
for i in range(len(population_value)):
# 选择模型
model = MLPRegressor(hidden_layer_sizes=population_value[i][0], tol=1e-2)
cv_scores = model_selection.cross_val_score(model,
self.x,
self.y,
cv=self.cv,
scoring='r2',
n_jobs=self.cv)
# 适应值为不同参数组合下交叉验证值
fitness_value.append(cv_scores.mean())
elif self.purpose == 'Feature': # GA用作特征选择
model = MLPRegressor(**self.params, tol=1e-2)
for i in range(self.population_size):
cv_scores = model_selection.cross_val_score(model,
self.x[:, population_value[i]],
self.y,
cv=self.cv,
scoring='r2',
n_jobs=self.cv)
fitness_value.append(cv_scores.mean())
elif self.purpose == 'Parameter+Feature':
for i in range(self.population_size):
# 选择模型
model = MLPRegressor(hidden_layer_sizes=population_value[i][0], tol=1e-2)
cv_scores = model_selection.cross_val_score(model,
self.x[:, population_value[i][len(self.params_bound):].astype(int)],
self.y,
cv=self.cv,
scoring='r2',
n_jobs=self.cv
)
# 适应值为不同参数组合下交叉验证值
fitness_value.append(cv_scores.mean())
return fitness_value
# 轮盘赌选择操作
def selection(self, population, fitness_value):
# 计算总适应度值
total = sum(fitness_value)
# 适应值所占比例
new_fitness = fitness_value / total
# 适应度比例累加列表
accumulated_fitness = []
temp = 0
for i in range(len(fitness_value)):
temp += new_fitness[i]
accumulated_fitness.append(temp)
# 随机个random数 从种群中选取
selected_population = np.empty(shape=(self.population_size, population.shape[1]))
for i in range(self.population_size):
select_criteria = random.random()
for j in range(self.population_size):
if accumulated_fitness[j] > select_criteria:
selected_population[i, ::] = population[j, ::]
break
return selected_population.astype(int)
# 交叉操作
def cross_over(self, population):
chromosome_length = population.shape[1]
# 新建子代种群array
offspring_population = np.empty(shape=(self.population_size, chromosome_length)).astype(int)
for i in range(0, self.population_size, 2): # 遍历种群
# 交叉概率阈值
cross_over_prob = random.random()
father = population[random.randint(0, self.population_size) - 1, ::] # 随机选择父代
mother = population[random.randint(0, self.population_size) - 1, ::] # 随机选择母代
child1 = father
child2 = mother
# 如果小于交换概率 那么进行交换
if cross_over_prob < self.pc:
# 决定单点交叉还是两点交叉
cross_over_type = random.random()
if cross_over_type <= 1/3: # 决定单点交叉还是多点交叉判断
cross_point = random.randint(0, chromosome_length - 1) # 随机生成交换点位
child1[cross_point:] = mother[cross_point:]
child2[:cross_point] = father[:cross_point]
offspring_population[i, ::] = child1 # 子代一
offspring_population[i+1, ::] = child2 # 子代二
elif 1/3 < cross_over_type <= 2/3:
cross_points = random.sample(range(0, chromosome_length - 1), 2) # 交叉点位
child1[cross_points[0]:cross_points[1]] = mother[cross_points[0]:cross_points[1]]
child2[cross_points[0]:cross_points[1]] = father[cross_points[0]:cross_points[1]]
offspring_population[i, ::] = child1 # 子代一
offspring_population[i+1, ::] = child2 # 子代一
else:
# 多点交叉
cross_points = random.sample(range(0, chromosome_length - 1),
random.randint(1, chromosome_length - 1))
for k in range(len(cross_points)):
child1[cross_points[k]] = mother[cross_points[k]]
child2[cross_points[k]] = father[cross_points[k]]
offspring_population[i, ::] = child1 # 子代一
offspring_population[i + 1, ::] = child2 # 子代一
else:
# 不交换
# 多点对应交换
father = population[random.randint(0, self.population_size) - 1, ::] # 随机选择母代
mother = population[random.randint(0, self.population_size) - 1, ::] # 随机选择母代
child1 = father
child2 = mother
offspring_population[i, ::] = child1 # 子代一
offspring_population[i + 1, ::] = child2 # 子代一
return offspring_population.astype(int)
# 变异操作
def mutation(self, population):
chromosome_length = population.shape[1]
for i in range(self.population_size): # 遍历种群
mutation_prob = random.random() # 随机变异概率
# 如果变异
if mutation_prob < self.pm:
mutation_points = random.sample(range(0, chromosome_length - 1), 2) # 交叉串点位
for j in range(mutation_points[0], mutation_points[1]+1):
population[i, j] = 1 - population[i, j]
return population.astype(int)
# 返回最好解
def best(self, population_value, fitness_value):
best_fitness = np.max(fitness_value)
best_fitness_location = np.argmax(fitness_value)
best_solution = population_value[best_fitness_location]
return best_fitness, best_solution
# 画图
def plot(self, results):
x = []
y = []
for i in range(self.iter_num):
x.append(i + 1)
y.append(results[i])
plt.plot(x, y)
plt.xlabel('Number of Iteration')
plt.ylabel('Cross Validation Score (R squared)')
plt.gca().xaxis.set_major_locator(MaxNLocator(integer=True))
plt.gca().yaxis.set_major_locator(MaxNLocator(integer=True))
if self.purpose == 'Parameter':
plt.title('GA_' + self.model + ' for ' + 'Hyperparameter Tuning')
else:
plt.title('GA_' + self.model + ' for ' + 'Feature Selection')
plt.show()
# 主函数
def run(self):
# 记录结果
results = []
parameters = []
best_fitness = 0.0
best_parameters = []
# 初始化种群
# parameter
if self.purpose == 'Parameter':
population = self.population_generate()[1]
# feature
elif self.purpose == 'Feature':
if self.filter is not None:
population = self.population_generate()[2]
else:
population = self.population_generate()
elif self.purpose == 'Parameter+Feature':
if self.filter is not None:
population = self.population_generate()[4]
else:
population = self.population_generate()[3]
# 迭代参数寻优
i = 0
while i < self.iter_num:
# 计算当前种群每个染色体的10进制取值
population_value = self.decoding(population)
# 计算适应函数
fitness_value = self.fitness_value(population_value)
# 寻找当前种群最好的参数值和最优适应度函数值
current_fitness, current_parameters = self.best(population_value, fitness_value)
# 与之前的最优适应度函数值比较,如果更优秀则替换最优适应度函数值和对应的参数
if current_fitness > best_fitness:
best_fitness = current_fitness
best_parameters = current_parameters
print('iteration is :', i+1, ';最佳参数:', best_parameters, ';最佳适应值', best_fitness)
results.append(best_fitness)
parameters.append(best_parameters)
# 种群更新
# 选择
after_selection_population = self.selection(population, fitness_value)
# 交叉
after_cross_over_population = self.cross_over(after_selection_population)
# 变异
after_mutation_population = self.mutation(after_cross_over_population)
# 替换种群
population = after_mutation_population
i += 1
global_optimal_results = max(results)
global_optimal_solution = parameters[results.index(max(results))]
print('全局最优结果为: ', global_optimal_results)
print('全局最优参数为: ', global_optimal_solution)
results.sort()
self.plot(results)
return global_optimal_solution
# 将GA最优参数对模型test集进行训练
def main(self, ga_results, params, x_train, y_train, x_test, y_test):
global model
# 将得到的最优参数再进行超参数的随机调参
# 如果GA用于特征选择
# 得到最优特征 再进行随机搜索 检索最优参数
if self.purpose == 'Feature':
# 如果是KNN模型
if self.model == 'KNN':
model = KNeighborsRegressor()
model = RandomizedSearchCV(estimator=model, param_distributions=params, cv=5,
scoring='r2', n_iter=iter, n_jobs=-1)
model.fit(x_train[:, ga_results], y_train)
best_params = model.best_params_
model = KNeighborsRegressor(n_neighbors=best_params['n_neighbors'],
leaf_size=best_params['leaf_size'], n_jobs=-1)
model.fit(x_train[:, ga_results], y_train)
# 如果是MLP模型
elif self.model == 'MLP':
model = KNeighborsRegressor()
model = RandomizedSearchCV(estimator=model, param_distributions=params, cv=5,
scoring='r2', n_iter=iter, n_jobs=-1)
model.fit(x_train[:, ga_results], y_train)
best_params = model.best_params_
model = MLPRegressor(hidden_layer_sizes=best_params['hidden_layer_sizes'])
model.fit(x_train[:, ga_results], y_train)
y_pred = model.predict(x_test[:, ga_results])
y_pred = y_scale.inverse_transform(y_pred.reshape(-1, 1))
y_test = y_scale.inverse_transform(y_test.reshape(-1, 1))
r2 = r2_score(y_test, y_pred)
print('R square: ', "%.3f" % r2)
# 如果GA用于参数调参
# 得到最优参数直接投入训练
if self.purpose == 'Parameter':
# 如果是KNN模型
if self.model == 'KNN':
model = KNeighborsRegressor(n_neighbors=ga_results[0],
leaf_size=ga_results[1], n_jobs=-1)
model.fit(x_train, y_train)
# 如果是MLP模型
if self.model == 'MLP':
model = MLPRegressor(hidden_layer_sizes=ga_results[0])
model.fit(x_train, y_train)
y_pred = model.predict(x_test[:, ga_results])
y_pred = y_scale.inverse_transform(y_pred.reshape(-1, 1))
y_test = y_scale.inverse_transform(y_test.reshape(-1, 1))
r2 = r2_score(y_test, y_pred)
print('R square for ' + self.model, "%.3f" % r2)
if __name__ == "__main__":
# Import data
data = pd.read_csv('Final_data.csv')
#
housing_x = data.iloc[:, :-1]
housing_y = data.iloc[:, -1].values.reshape(-1, 1)
x_scale = StandardScaler().fit(housing_x.values)
y_scale = StandardScaler().fit(housing_y)
x_data = x_scale.transform(housing_x.values)
y_data = y_scale.transform(housing_y)
# Data split size
size = 0.2
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data.flatten(), test_size=size, random_state=0)
# Random search parameter
iter = 20
# RFECV parameter
step = 3
cv = 5
# avm = Avm(housing_x, housing_y, size, step, cv)
# params_bound={'n_neighbors': [3, 15], 'leaf_size': [10, 50]}
# GA function
ga = GA(x=x_train, y=y_train,
model='KNN',
purpose='Feature',
params={'n_neighbors': 5, 'leaf_size': 30},
population_size=20, iter_num=50, pc=0.8, pm=0.2,
filter='MI')
population = ga.population_generate()