-
Notifications
You must be signed in to change notification settings - Fork 0
/
agent.py
123 lines (94 loc) · 3.45 KB
/
agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import random
from user_tokens import UserTokens as user
class Agent(object):
def __init__(self, user_token):
'''
Implementation of reinforced learning
'''
# A dictionary mapped as state => value
self.move_values = {}
# Random Rate: How often the agent will explore
self.epsilon = .5
self.moves_taken = 0
# Discount Factor
self.gamma = .9
# Define what token this agent plays as
self.token = user_token
self.winning_value = 3
self.available_value = .1
self.draw_value = -1
self.losing_value = -2
# Keep track of last state the agent saw
self.last_state = None
self.last_value = 0
def set_learn_rate(self, rate):
'''
Public setter for learn rate
'''
self.epsilon = rate
def choose_random(self, board):
'''
random choice n board
'''
spaces = board.available_spaces()
return random.choice(spaces)
def set_user(self, user):
'''
sets the user token
'''
self.token = user
def get_value_of_board(self, board):
'''
Get the vale of the move
'''
tokenized = board.tokenize()
if not tokenized in self.move_values:
self.add_key(board, tokenized)
return self.move_values[tokenized]
def add_key(self, board, key):
'''
check the move status
'''
game_status = board.game_status()
if game_status == self.token:
self.move_values[key] = self.winning_value
elif game_status == user.available:
self.move_values[key] = self.available_value
elif game_status == user.draw:
self.move_values[key] = self.draw_value
else:
self.move_values[key] = self.losing_value
def next_move(self, board):
'''
Decide the net move base on reinforcement learning
'''
# if self.moves_taken % 200 == 0:
# self.epsilon *= .9
# Keep track of state before
self.last_state = board.tokenize()
self.last_value = self.get_value_of_board(board)
# Uses epsilon to choose if it will explore or not
if random.random() <= self.epsilon:
return self.choose_random(board)
# Learning move
else:
highest_move = -1
highest_value_of_move = -100000
available_spaces = board.available_spaces()
original_board = board.tokenize()
# Goes through each available space and checks the value
for i in range(len(available_spaces)):
board.take_space(self.token, available_spaces[i])
value = self.get_value_of_board(board)
# If the value is higher than the current highest,
# set new highest
if value > highest_value_of_move:
highest_move = available_spaces[i]
highest_value_of_move = value
# Return the board to the original
board.set_board_from_string(original_board)
# Based on the next move, and value, adjust the current value
if self.last_state is not None:
self.move_values[self.last_state] += self.gamma * (highest_value_of_move - self.last_value)
self.moves_taken += 1
return highest_move