nplayr lola doesnt learn

ucl-dark · Aug 25, 2023 · 95edb08 · 95edb08
1 parent 3b675a9
commit 95edb08
Show file tree

Hide file tree

Showing 7 changed files with 289 additions and 108 deletions.
diff --git a/pax/conf/experiment/lola/lola_vs_ppo_mem.yaml b/pax/conf/experiment/lola/lola_vs_ppo_mem.yaml
@@ -2,7 +2,7 @@
 
 # Agents  
 agent1: 'LOLA'
-agent2: 'PPO_memory'
+agent2: 'Tabular'
 
 # Environment
 # env_id: iterated_matrix_game
@@ -14,23 +14,23 @@ env_id: iterated_nplayer_tensor_game
 env_type: sequential
 env_discount: 0.96
 payoff_table: [
-[ 0 , 1000 ],
-[ -3 , -1 ],
+[ -1 , 1000 ],
+[ -3 , 0 ],
 [ 1000 , -2 ],
 ]
 runner: tensor_rl_nplayer
 num_players: 2
 
 
-num_envs: 12
+num_envs: 100
 num_opps: 1
 num_outer_steps: 1
 num_inner_steps: 100 # how long a game takes
 num_iters: 10000
 
 # LOLA agent parameters
 lola: 
-  use_baseline: True   
+  use_baseline: False   
   adam_epsilon: 1e-5
   lr_in:  0.03
   lr_out:  0.005

diff --git a/pax/conf/experiment/lola/ppo_mem_baseline.yaml b/pax/conf/experiment/lola/ppo_mem_baseline.yaml
@@ -66,4 +66,4 @@ wandb:
   project: lola
   group: 'LOLA-vs-${agent2}'
   name: ppo_mem_baseline
-  log: False 
+  log: True 
diff --git a/pax/conf/experiment/lola/ppo_mem_baseline2.yaml b/pax/conf/experiment/lola/ppo_mem_baseline2.yaml
@@ -0,0 +1,73 @@
+# @package _global_
+
+# Agents  
+agent1: 'PPO_memory'
+agent2: 'PPO_memory'
+
+# Environment
+env_id: iterated_nplayer_tensor_game
+env_type: sequential
+env_discount: 0.96
+payoff_table: [
+[-1 , 1000 ],
+[ -3 , 0 ],
+[ 1000 , -2 ],
+]
+runner: tensor_rl_nplayer
+num_players: 2
+
+num_envs: 512
+num_opps: 1
+num_inner_steps: 100 # how long a game takes
+num_outer_steps: 1 # how many games they are playing
+num_iters: 1000
+
+
+ppo1:
+  num_minibatches: 10
+  num_epochs: 4 
+  gamma: 0.96
+  gae_lambda: 0.95
+  ppo_clipping_epsilon: 0.2
+  value_coeff: 0.5
+  clip_value: True
+  max_gradient_norm: 0.5
+  anneal_entropy: True
+  entropy_coeff_start: 0.1
+  entropy_coeff_horizon: 0.25e9
+  entropy_coeff_end: 0.05
+  lr_scheduling: True
+  learning_rate: 3e-4
+  adam_epsilon: 1e-5
+  with_memory: True
+  hidden_size: 16
+  with_cnn: False
+
+ppo2:
+  num_minibatches: 10
+  num_epochs: 4 
+  gamma: 0.96
+  gae_lambda: 0.95
+  ppo_clipping_epsilon: 0.2
+  value_coeff: 0.5
+  clip_value: True
+  max_gradient_norm: 0.5
+  anneal_entropy: True
+  entropy_coeff_start: 0.1
+  entropy_coeff_horizon: 0.25e9
+  entropy_coeff_end: 0.05
+  lr_scheduling: True
+  learning_rate: 3e-4
+  adam_epsilon: 1e-5
+  with_memory: True
+  hidden_size: 16
+  with_cnn: False
+
+
+# Logging setup
+wandb:
+  entity: "ucl-dark"
+  project: lola
+  group: 'LOLA-vs-${agent2}'
+  name: ppo_mem_baseline_nplayer_runnr
+  log: True 
diff --git a/pax/experiment.py b/pax/experiment.py
@@ -91,9 +91,9 @@
 # NOTE: THIS MUST BE sDONE BEFORE IMPORTING JAX
 # uncomment to debug multi-devices on CPU
 # os.environ["XLA_FLAGS"] = "--xla_force_host_platform_device_count=2"
-from jax.config import config
+# from jax.config import config
 
-config.update("jax_disable_jit", True)
+# config.update("jax_disable_jit", True)
 
 
 def global_setup(args):