restructuring and deleting some runners

ucl-dark · Oct 24, 2023 · dd6555b · dd6555b
1 parent 97c97e5
commit dd6555b
Show file tree

Hide file tree

Showing 10 changed files with 31 additions and 1,321 deletions.
diff --git a/.gitignore b/.gitignore
@@ -114,3 +114,8 @@ experiment.log
 
 # Pax
 pax/version.py
+
+*.gif
+*.json
+*.png
+*.sh
diff --git a/pax/experiment.py b/pax/experiment.py
@@ -65,13 +65,13 @@
 from pax.runners.runner_evo import EvoRunner
 from pax.runners.runner_evo_multishaper import MultishaperEvoRunner
 from pax.runners.runner_evo_hardstop import EvoHardstopRunner
-from pax.runners.runner_evo_mixed_lr import EvoMixedLRRunner
-from pax.runners.runner_evo_mixed_payoffs import EvoMixedPayoffRunner
-from pax.runners.runner_evo_mixed_IPD_payoffs import EvoMixedIPDPayoffRunner
-from pax.runners.runner_evo_mixed_payoffs_input import EvoMixedPayoffInputRunner
-from pax.runners.runner_evo_mixed_payoffs_gen import EvoMixedPayoffGenRunner
-from pax.runners.runner_evo_mixed_payoffs_pred import EvoMixedPayoffPredRunner
-from pax.runners.runner_evo_mixed_payoffs_only_opp import EvoMixedPayoffOnlyOppRunner
+from pax.runners.experimental.runner_evo_mixed_lr import EvoMixedLRRunner
+from pax.runners.experimental.runner_evo_mixed_payoffs import EvoMixedPayoffRunner
+from pax.runners.experimental.runner_evo_mixed_IPD_payoffs import EvoMixedIPDPayoffRunner
+from pax.runners.experimental.runner_evo_mixed_payoffs_input import EvoMixedPayoffInputRunner
+from pax.runners.experimental.runner_evo_mixed_payoffs_gen import EvoMixedPayoffGenRunner
+from pax.runners.experimental.runner_evo_mixed_payoffs_pred import EvoMixedPayoffPredRunner
+from pax.runners.experimental.runner_evo_mixed_payoffs_only_opp import EvoMixedPayoffOnlyOppRunner
 from pax.runners.runner_evo_scanned import EvoScannedRunner
 
 from pax.envs.iterated_tensor_game_n_player import IteratedTensorGameNPlayer

diff --git a/pax/runners/runner_evo_mixed_IPD_payoffs.py → ...erimental/runner_evo_mixed_IPD_payoffs.py b/pax/runners/runner_evo_mixed_IPD_payoffs.py → ...erimental/runner_evo_mixed_IPD_payoffs.py
@@ -37,6 +37,8 @@ class EvoMixedIPDPayoffRunner:
     It composes together agents, watchers, and the environment.
     Within the init, we declare vmaps and pmaps for training.
     The environment provided must conform to a meta-environment.
+    Each opponent has a different payoff matrix that follows the IPD conditions but each member 
+    of the evo population plays against the same payoff matrices to ensure fair comparison.
     Args:
         agents (Tuple[agents]):
             The set of agents that will run in the experiment. Note, ordering is

diff --git a/pax/runners/runner_evo_mixed_lr.py → ...nners/experimental/runner_evo_mixed_lr.py b/pax/runners/runner_evo_mixed_lr.py → ...nners/experimental/runner_evo_mixed_lr.py
@@ -37,6 +37,9 @@ class EvoMixedLRRunner:
     It composes together agents, watchers, and the environment.
     Within the init, we declare vmaps and pmaps for training.
     The environment provided must conform to a meta-environment.
+    Each opponent has a different learning rate, but the members of the population
+    play against the same learning rates to ensure a fair comparison.
+    
     Args:
         agents (Tuple[agents]):
             The set of agents that will run in the experiment. Note, ordering is
@@ -212,7 +215,7 @@ def _inner_rollout(carry, unused):
                 obs2,
                 a2_mem,
             )
-            jax.debug.print("env_params: {x}", x=env_params)
+            # jax.debug.print("env_params: {x}", x=env_params)
             (next_obs1, next_obs2), env_state, rewards, done, info = env.step(
                 env_rng,
                 env_state,
@@ -338,10 +341,10 @@ def _rollout(
                     a2_rng,
                     agent2._mem.hidden,
                 )
-                # generate an array of shape [10]
-                random_numbers = jax.random.uniform(_rng_run, minval=1.0, maxval=1.0, shape=(10,))
-                # # repeat the array 1000 times along the first dimension
-                learning_rates = jnp.tile(random_numbers, (1000, 1))
+                # generate an array of shape [args.num_opps]
+                random_numbers = jax.random.uniform(_rng_run, minval=1e-5, maxval=1.0, shape=(args.num_opps,))
+                # # repeat the array popsize-times along the first dimension
+                learning_rates = jnp.tile(random_numbers, (args.popsize, 1))
                 a2_state.opt_state[2].hyperparams['step_size'] = learning_rates
                 # jax.debug.breakpoint()
 

diff --git a/pax/runners/runner_evo_mixed_payoffs.py → .../experimental/runner_evo_mixed_payoffs.py b/pax/runners/runner_evo_mixed_payoffs.py → .../experimental/runner_evo_mixed_payoffs.py
@@ -37,6 +37,7 @@ class EvoMixedPayoffRunner:
     It composes together agents, watchers, and the environment.
     Within the init, we declare vmaps and pmaps for training.
     The environment provided must conform to a meta-environment.
+    Payoff matrix is randomly sampled at each rollout. Each opponent has a different payoff matrix.
     Args:
         agents (Tuple[agents]):
             The set of agents that will run in the experiment. Note, ordering is

diff --git a/pax/runners/runner_evo_mixed_payoffs_gen.py → ...erimental/runner_evo_mixed_payoffs_gen.py b/pax/runners/runner_evo_mixed_payoffs_gen.py → ...erimental/runner_evo_mixed_payoffs_gen.py
@@ -37,6 +37,7 @@ class EvoMixedPayoffGenRunner:
     It composes together agents, watchers, and the environment.
     Within the init, we declare vmaps and pmaps for training.
     The environment provided must conform to a meta-environment.
+    Payoff matrix is randomly sampled at each rollout. Each opponent has the same payoff matrix.
     Args:
         agents (Tuple[agents]):
             The set of agents that will run in the experiment. Note, ordering is

diff --git a/...runners/runner_evo_mixed_payoffs_input.py → ...imental/runner_evo_mixed_payoffs_input.py b/...runners/runner_evo_mixed_payoffs_input.py → ...imental/runner_evo_mixed_payoffs_input.py
@@ -37,6 +37,8 @@ class EvoMixedPayoffInputRunner:
     It composes together agents, watchers, and the environment.
     Within the init, we declare vmaps and pmaps for training.
     The environment provided must conform to a meta-environment.
+    Add payoff matrices as input to agents so they don't have to figure out payoff matrices on the go.
+    Either randomly sample and set a payoff matrix
     Args:
         agents (Tuple[agents]):
             The set of agents that will run in the experiment. Note, ordering is
@@ -201,8 +203,8 @@ def _inner_rollout(carry, unused):
             # a1_rng = rngs[:, :, :, 1, :]
             # a2_rng = rngs[:, :, :, 2, :]
             rngs = rngs[:, :, :, 3, :]
-            print("OBS1 shape: ", obs1.shape)
-            print("env params shape: ", env_params.payoff_matrix.shape)
+            # print("OBS1 shape: ", obs1.shape)
+            # print("env params shape: ", env_params.payoff_matrix.shape)
             # flatten the payoff matrix and append it to the observations
             # the observations have shape (500, 10, 2, 5) and the payoff matrix has shape (10, 4, 2)
             # we want to append the payoff matrix to the observations so that the observations have shape (500, 10, 2, 5+8)
@@ -290,7 +292,7 @@ def _outer_rollout(carry, unused):
             # MFOS has to take a meta-action for each episode
             if args.agent1 == "MFOS":
                 a1_mem = agent1.meta_policy(a1_mem)
-            print("OBS2 shape: ", obs2.shape)
+            # print("OBS2 shape: ", obs2.shape)
             # payoff_matrix = env_params.payoff_matrix.reshape((10, 8))
             # payoff_matrix = jnp.tile(jnp.expand_dims(jnp.tile(payoff_matrix, (500, 1, 1)), 2), (1, 1, 2, 1))
             # obs2_update = jnp.concatenate((obs2, payoff_matrix), axis=3)

diff --git a/...ners/runner_evo_mixed_payoffs_only_opp.py → ...ntal/runner_evo_mixed_payoffs_only_opp.py b/...ners/runner_evo_mixed_payoffs_only_opp.py → ...ntal/runner_evo_mixed_payoffs_only_opp.py
@@ -37,6 +37,8 @@ class EvoMixedPayoffOnlyOppRunner:
     It composes together agents, watchers, and the environment.
     Within the init, we declare vmaps and pmaps for training.
     The environment provided must conform to a meta-environment.
+    Opponent plays a noisy payoff function of the original IPD payoff matrix.
+    Same noise applied to all opponents.
     Args:
         agents (Tuple[agents]):
             The set of agents that will run in the experiment. Note, ordering is