Skip to content

Commit

Permalink
nplayr lola doesnt learn
Browse files Browse the repository at this point in the history
  • Loading branch information
alexandrasouly committed Aug 25, 2023
1 parent 3b675a9 commit 95edb08
Show file tree
Hide file tree
Showing 7 changed files with 289 additions and 108 deletions.
10 changes: 5 additions & 5 deletions pax/conf/experiment/lola/lola_vs_ppo_mem.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Agents
agent1: 'LOLA'
agent2: 'PPO_memory'
agent2: 'Tabular'

# Environment
# env_id: iterated_matrix_game
Expand All @@ -14,23 +14,23 @@ env_id: iterated_nplayer_tensor_game
env_type: sequential
env_discount: 0.96
payoff_table: [
[ 0 , 1000 ],
[ -3 , -1 ],
[ -1 , 1000 ],
[ -3 , 0 ],
[ 1000 , -2 ],
]
runner: tensor_rl_nplayer
num_players: 2


num_envs: 12
num_envs: 100
num_opps: 1
num_outer_steps: 1
num_inner_steps: 100 # how long a game takes
num_iters: 10000

# LOLA agent parameters
lola:
use_baseline: True
use_baseline: False
adam_epsilon: 1e-5
lr_in: 0.03
lr_out: 0.005
Expand Down
2 changes: 1 addition & 1 deletion pax/conf/experiment/lola/ppo_mem_baseline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,4 @@ wandb:
project: lola
group: 'LOLA-vs-${agent2}'
name: ppo_mem_baseline
log: False
log: True
73 changes: 73 additions & 0 deletions pax/conf/experiment/lola/ppo_mem_baseline2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# @package _global_

# Agents
agent1: 'PPO_memory'
agent2: 'PPO_memory'

# Environment
env_id: iterated_nplayer_tensor_game
env_type: sequential
env_discount: 0.96
payoff_table: [
[-1 , 1000 ],
[ -3 , 0 ],
[ 1000 , -2 ],
]
runner: tensor_rl_nplayer
num_players: 2

num_envs: 512
num_opps: 1
num_inner_steps: 100 # how long a game takes
num_outer_steps: 1 # how many games they are playing
num_iters: 1000


ppo1:
num_minibatches: 10
num_epochs: 4
gamma: 0.96
gae_lambda: 0.95
ppo_clipping_epsilon: 0.2
value_coeff: 0.5
clip_value: True
max_gradient_norm: 0.5
anneal_entropy: True
entropy_coeff_start: 0.1
entropy_coeff_horizon: 0.25e9
entropy_coeff_end: 0.05
lr_scheduling: True
learning_rate: 3e-4
adam_epsilon: 1e-5
with_memory: True
hidden_size: 16
with_cnn: False

ppo2:
num_minibatches: 10
num_epochs: 4
gamma: 0.96
gae_lambda: 0.95
ppo_clipping_epsilon: 0.2
value_coeff: 0.5
clip_value: True
max_gradient_norm: 0.5
anneal_entropy: True
entropy_coeff_start: 0.1
entropy_coeff_horizon: 0.25e9
entropy_coeff_end: 0.05
lr_scheduling: True
learning_rate: 3e-4
adam_epsilon: 1e-5
with_memory: True
hidden_size: 16
with_cnn: False


# Logging setup
wandb:
entity: "ucl-dark"
project: lola
group: 'LOLA-vs-${agent2}'
name: ppo_mem_baseline_nplayer_runnr
log: True
4 changes: 2 additions & 2 deletions pax/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,9 @@
# NOTE: THIS MUST BE sDONE BEFORE IMPORTING JAX
# uncomment to debug multi-devices on CPU
# os.environ["XLA_FLAGS"] = "--xla_force_host_platform_device_count=2"
from jax.config import config
# from jax.config import config

config.update("jax_disable_jit", True)
# config.update("jax_disable_jit", True)


def global_setup(args):
Expand Down
Loading

0 comments on commit 95edb08

Please sign in to comment.