forked from EricSteinberger/Deep-CFR
-
Notifications
You must be signed in to change notification settings - Fork 0
/
paper_experiment_leduc_exploitability.py
42 lines (37 loc) · 2.29 KB
/
paper_experiment_leduc_exploitability.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from PokerRL.game.games import StandardLeduc
from DeepCFR.EvalAgentDeepCFR import EvalAgentDeepCFR
from DeepCFR.TrainingProfile import TrainingProfile
from DeepCFR.workers.driver.Driver import Driver
if __name__ == '__main__':
ctrl = Driver(t_prof=TrainingProfile(name="LEDUC_EXPLOITABILITY",
nn_type="feedforward",
max_buffer_size_adv=1e6,
max_buffer_size_avrg=1e6,
eval_agent_export_freq=999999, # Don't export
n_traversals_per_iter=1500,
n_batches_adv_training=750,
n_batches_avrg_training=5000,
n_merge_and_table_layer_units_adv=64,
n_merge_and_table_layer_units_avrg=64,
n_units_final_adv=64,
n_units_final_avrg=64,
mini_batch_size_adv=2048,
mini_batch_size_avrg=2048,
init_adv_model="last", # warm start neural weights with init from last iter
init_avrg_model="random",
use_pre_layers_adv=False, # shallower nets
use_pre_layers_avrg=False, # shallower nets
game_cls=StandardLeduc,
# You can specify one or both modes. Choosing both is useful to compare them.
eval_modes_of_algo=(
EvalAgentDeepCFR.EVAL_MODE_SINGLE, # SD-CFR
EvalAgentDeepCFR.EVAL_MODE_AVRG_NET, # Deep CFR
),
DISTRIBUTED=False,
log_verbose=False,
),
eval_methods={
"br": 15,
},
n_iterations=None)
ctrl.run()