-
Notifications
You must be signed in to change notification settings - Fork 83
/
Genetic Algorithm for Reinforcement Learning.py
121 lines (84 loc) · 3.09 KB
/
Genetic Algorithm for Reinforcement Learning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import numpy as np
import matplotlib.pyplot as plt
# specifying the size for each and
# every matplotlib plot globally
plt.rcParams['figure.figsize'] = [8, 6]
# defining list objects with range of the graph
x1_range = [-100, 100]
x2_range = [-100, 100]
# empty list object to store the population
population = []
# this function is used to generate the population
# and appending it to the population list defined above
# it takes the attributes as no. of features in a
# population and size that we have in it
def populate(features, size = 1000):
# here we are defining the coordinate
# for each entity in a population
initial = []
for _ in range(size):
entity = []
for feature in features:
# this * feature variable unpacks a list
# or tuple into position arguments.
val = np.random.randint(*feature)
entity.append(val)
initial.append(entity)
return np.array(initial)
# defining the virus in the form of numpy array
virus = np.array([5, 5])
# only the 100 fit ones will survive in this one
def fitness(population, virus, size = 100):
scores = []
# enumerate also provides the index as for the iterator
for index, entity in enumerate(population):
score = np.sum((entity-virus)**2)
scores.append((score, index))
scores = sorted(scores)[:size]
return np.array(scores)[:, 1]
# this function is used to plot the graph
def draw(population, virus):
plt.xlim((-100, 100))
plt.ylim((-100, 100))
plt.scatter(population[:, 0], population[:, 1], c ='green', s = 12)
plt.scatter(virus[0], virus[1], c ='red', s = 60)
def reduction(population, virus, size = 100):
# only the index of the fittest ones
# is returned in sorted format
fittest = fitness(population, virus, size)
new_pop = []
for item in fittest:
new_pop.append(population[item])
return np.array(new_pop)
# cross mutation in order to generate the next generation
# of the population which will be more immune to virus than previous
def cross(population, size = 1000):
new_pop = []
for _ in range(size):
p = population[np.random.randint(0, len(population))]
m = population[np.random.randint(0, len(population))]
# we are only considering half of each
# without considering random selection
entity = []
entity.append(*p[:len(p)//2])
entity.append(*m[len(m)//2:])
new_pop.append(entity)
return np.array(new_pop)
# generating and adding the random features to
# the entity so that it looks more distributed
def mutate(population):
return population + np.random.randint(-10, 10, 2000).reshape(1000, 2)
# the complete cycle of the above steps
population = populate([x1_range, x2_range], 1000)
# gens is the number of generation
def cycle(population, virus, gens = 1):
# if we change the value of gens, we'll get
# next and genetically more powerful generation
# of the population
for _ in range(gens):
population = reduction(population, virus, 100)
population = cross(population, 1000)
population = mutate(population)
return population
population = cycle(population, virus)
draw(population, virus)