-
Notifications
You must be signed in to change notification settings - Fork 0
/
grid_search.py
56 lines (51 loc) · 2.18 KB
/
grid_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import numpy as np
from environments.boyans_chain import boyans_chain
from agents.TD_lambda import TD_lambda
from time import time
# SWEEP OVER STEPSIZE AND LAMBDA
# Check last half of the run. Total run = 50k episodes, check only AUC of 25k episodes
# stepsize of 0.0001 is too small. 0.00025 works fine - probably just the best.
# Think of continuous (stepsize) as well as discrete (lambda) hyperparams to sweep over.
# Check how many runs are essential to get statistical significance
# Takes 5000 sec = 83 min to run normal TD lambda=0.4
# Takes 5880 sec = 98 min to run true online TD lambda=0.4
# Opt for the settings with lower run times like normal vs true online TD lambda
# Compare performances of normal vs true online TD lambda for fun
# Just run normal TD lambda with best performance for 100 runs sequentially, without any other setting in parallel - time = 1500 sec = 25 min
# Post on hyperparams channel and ask for how to tackle cross entropy optimization
# Get Parameter Study on stepsize vs lambda vs performance
# Measure the wall clock time
truevalue_weights = [-24, -16, -8, 0]
gamma = 1.0
length_observation = 4
num_episodes = 50000
num_runs = 1
start = time()
lmbda = 0.4
stepsize = 0.00025
alpha = 0.002
beta1 = 0.9
beta2 = 0.999
for run in range(num_runs):
print(run)
environment = boyans_chain()
agent = TD_lambda(gamma, lmbda, alpha, beta1, beta2, length_observation)
endofrun = False
for episode in range(num_episodes):
seed = run * num_episodes + episode
trajectory = []
endofepisode = False
reward = 0
action = 0
observation, fullstate = environment.start(seed)
action = agent.start(observation, seed)
while endofepisode == False:
if episode == num_episodes - 1:
endofrun = True
observation, reward, endofepisode, fullstate, mapping = environment.step(action)
action, lossendofepisode = agent.step(observation, reward, endofepisode, endofrun, run, truevalue_weights, mapping)
end = time()
datapath = 'Data/boyans_chain/' + 'lambda=' + str(lmbda) + '_' + 'stepsize=' + str(stepsize) + '/time'
file = open(datapath, 'w+')
file.write(str(end-start))
file.close()