-
Notifications
You must be signed in to change notification settings - Fork 0
/
mdpAgents.py
342 lines (304 loc) · 10.1 KB
/
mdpAgents.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
import api
import util
from game import Agent
from pacman import Directions
EMPTY_LOCATION_REWARD = -0.04
FOOD_REWARD = 10
CAPSULE_REWARD = 100
GHOST_REWARD = -1000
GAMMA = 0.9
DANGER_ZONE_RATIO = 6
DANGER = 500
ITERATIONS = 10
class MDPAgent(Agent):
def __init__(self):
self.map = self.walls = self.corners = None
def registerInitialState(self, state):
self.walls = api.walls(state)
self.corners = api.corners(state)
self.map = initial_map(self.corners, self.walls)
def getAction(self, state):
if self.map is None:
self.registerInitialState(state)
self.map = value_iteration(self.map, state)
legal = api.legalActions(state)
if Directions.STOP in legal:
legal.remove(Directions.STOP)
pacman = api.whereAmI(state)
[scores, actions] = get_action_scores(legal, self.map, pacman[0], pacman[1])
max_score_index = scores.index(max(scores))
choice = actions[max_score_index]
return api.makeMove(choice, legal)
def get_action_scores(legal, pacman_map, x, y):
"""
Get an array containing two arrays: scores and actions.
Scores contains the bellmann scores corresponding to each action.
Both arrays use the same indexing order (i.e.: the first element in scores
contains the score for the first element in actions)
@param legal: the legal moves
@type legal: list
@param pacman_map: the map containing the bellmann scores (doubly dimensional array [y][x])
@type pacman_map: list
@param x: the x position of pacman
@type x : int
@param y: the y position of pacman
@type y : int
@return: The list containing scores and actions results [scores, actions]
@rtype: list
"""
scores = []
actions = []
for action in legal:
value = None
if action is Directions.NORTH:
value = pacman_map[y + 1][x]
elif action is Directions.SOUTH:
value = pacman_map[y - 1][x]
elif action is Directions.EAST:
value = pacman_map[y][x + 1]
elif action is Directions.WEST:
value = pacman_map[y][x - 1]
if value is not None:
scores.append(value)
actions.append(action)
return [scores, actions]
def value_iteration(m, state):
"""
Apply value iteration to the given map m. The number of iterations is set to the global
constant ITERATIONS
@param m: the map to apply value iteration to
@type m: list
@param state: the state of the game
@type state: state
@return: the map with the new values
@rtype: list
"""
iterations = ITERATIONS
corners = api.corners(state)
food = api.food(state)
walls = api.walls(state)
ghosts = api.ghosts(state)
capsules = api.capsules(state)
r_map = reward_map(corners, food, walls, ghosts, capsules)
h = corners[1][0] + 1
w = corners[2][1] + 1
pacman = api.whereAmI(state)
pacman = (pacman[1], pacman[0])
update_reward_map(r_map, pacman, ghosts, h, w)
while iterations > 0:
new_m = initial_map(corners, walls)
for i in range(w):
for j in range(h):
r = r_map[i][j]
new_m[i][j] = bellmann(m, (i, j), w, h, r)
m = new_m
iterations -= 1
return m
def bellmann(m, cell, w, h, r):
"""
Get the updated value of a cell using the Bellman equation. The probabilities are hardcoded to be
80% for forward movement and 10% for other lateral movements.
@param m: the map containing the previous values for other cells
@type m: list
@param cell: the cell to retrieve the new value for
@type cell: Tuple[int, int]
@param w: the width of the map
@type w: int
@param h: the height of the map
@type h: int
@param r: the reward value of the given cell as described in the reward function
@type r: int
@return: the new value for the cell using the Bellmann equation
@rtype: float
"""
x = cell[0]
y = cell[1]
# reward function
if r is None: # wall
return None
east = west = north = south = None
current = m[x][y]
# can go
if x < w - 1:
east = m[x + 1][y]
if x > 0:
west = m[x - 1][y]
if y < h - 1:
north = m[x][y + 1]
if y > 0:
south = m[x][y - 1]
if east is None:
east = -1
if west is None:
west = -1
if north is None:
north = -1
if south is None:
south = -1
# region probabilities
if north is not None:
north_val = north * 0.8 + (east + west) * 0.1
else:
north_val = current
if south is not None:
south_val = south * 0.8 + (east + west) * 0.1
else:
south_val = current
if east is not None:
east_val = east * 0.8 + (north + south) * 0.1
else:
east_val = current
if west is not None:
west_val = west * 0.8 + (north + south) * 0.1
else:
west_val = current
# endregion
max_val = max([north_val, south_val, east_val, west_val])
return float(float(r) + float(GAMMA) * float(max_val))
def update_reward_map(r_map, pacman, ghosts, h, w):
"""
Update the reward map by applying mali to the cells that are close to pacman. The closest said
cell is to a ghost, the greater the malus is that is applied to the reward value of the cell.
Only a certain number of cells is taken into consideration. Specifically the same amount as the value
of the global constant DANGER_ZONE
@param r_map: the reward map without any malus
@type r_map: list
@param pacman: pacman's position
@type pacman: Tuple[int, int]
@param ghosts: a list of ghost positions
@type ghosts: list
@param h: the height of the map
@type h: int
@param w: the width of the map
@type w: int
"""
for n in get_neighbours(pacman, h, w):
if n is not None and r_map[n[0]][n[1]] is not None:
[distance, cells] = distance_to_closest_ghost(n, ghosts, h, w)
if distance > 0:
# the further away we are from pacman, the less impactful the malus is
r_map[n[0]][n[1]] -= (DANGER / distance)
for cell in cells:
if r_map[cell[0]][cell[1]] is not None:
r_map[cell[0]][cell[1]] -= (DANGER / distance)
def distance_to_closest_ghost(cell, ghosts, h, w):
"""
Calculate the distance to the closest ghost as the value of cells explored before reaching the same position
as the closest ghost, using a frontier traversing algorithm.
@param cell: the cell from which to calculate the distance
@type cell: list
@param ghosts: the list containing the positions of ghosts
@type ghosts: list
@param h: the height of the map
@type h: int
@param w: the width of the map
@type w: w
@return: the distance to the ghost
@rtype: int
"""
frontier = util.Queue()
frontier.push(cell)
came_from = dict()
came_from[cell] = None
distance = 0
found = False
cells = []
while not frontier.isEmpty() and distance < (h*w / DANGER_ZONE_RATIO):
current = frontier.pop()
cells.append(current)
distance += 1
if (current[1], current[0]) in ghosts:
found = True
break
for neighbour in get_neighbours(current, h, w):
if neighbour is not None and neighbour not in came_from:
frontier.push(neighbour)
came_from[neighbour] = current
if found:
return [distance, cells]
else:
return [0, cells]
def get_neighbours(cell, h, w):
"""
Get the adjacent cells
@param cell: the cell for which to retrieve the neighbours
@type cell: list
@param h: the height of the map
@type h: int
@param w: the width of the map
@type w: int
@return: list of the neighbours' coordinates
@rtype: list
"""
x = cell[0]
y = cell[1]
north = south = east = west = None
if y + 1 < h:
north = (x, y + 1)
if y - 1 > 0:
south = (x, y - 1)
if x + 1 < w:
east = (x + 1, y)
if x - 1 > 0:
west = (x - 1, y)
return [north, south, east, west]
def reward_map(corners, food, walls, ghosts, capsules):
"""
Get a double dimensional list containing values corresponding to the reward function
@param corners: a list of the corners of the map
@type corners: list
@param food: a list of the food of the map
@type food: list
@param walls: a list of the walls of the map
@type walls: list
@param ghosts: a list of the ghosts of the map
@type ghosts: list
@param capsules: a list of the capsules of the map
@type capsules: list
@return: the reward map
@rtype: list
"""
m = initial_map(corners, walls)
h = corners[1][0] + 1
w = corners[2][1] + 1
for i in range(w):
for j in range(h):
cell = (j, i)
if cell in food:
m[i][j] = FOOD_REWARD
elif cell in walls:
m[i][j] = None
elif cell in ghosts:
m[i][j] = GHOST_REWARD
elif cell in capsules:
m[i][j] = CAPSULE_REWARD
elif cell in [(7, 6), (10, 6)]:
m[i][j] = -100
else:
m[i][j] = EMPTY_LOCATION_REWARD
return m
def initial_map(corners, walls):
"""
Get an empty copy of the map. All values are either None if there is a wall, or the value of the
global constant EMPTY_LOCATION_REWARD otherwise.
@param corners: the corners of the map
@type corners: list
@param walls: the walls of the map
@type walls: list
@return: a virtually empty copy of the map
@rtype: list
"""
h = corners[1][0] + 1
w = corners[2][1] + 1
pacman_map = []
for i in range(w):
pacman_map.append([])
for j in range(h):
pacman_map[i].append(" ")
for i in range(w):
for j in range(h):
if (j, i) in walls:
pacman_map[i][j] = None
else:
pacman_map[i][j] = EMPTY_LOCATION_REWARD
return pacman_map