Skip to content

Commit

Permalink
restructuring and deleting some runners
Browse files Browse the repository at this point in the history
  • Loading branch information
Aidandos committed Oct 24, 2023
1 parent 97c97e5 commit dd6555b
Show file tree
Hide file tree
Showing 10 changed files with 31 additions and 1,321 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,8 @@ experiment.log

# Pax
pax/version.py

*.gif
*.json
*.png
*.sh
14 changes: 7 additions & 7 deletions pax/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,13 @@
from pax.runners.runner_evo import EvoRunner
from pax.runners.runner_evo_multishaper import MultishaperEvoRunner
from pax.runners.runner_evo_hardstop import EvoHardstopRunner
from pax.runners.runner_evo_mixed_lr import EvoMixedLRRunner
from pax.runners.runner_evo_mixed_payoffs import EvoMixedPayoffRunner
from pax.runners.runner_evo_mixed_IPD_payoffs import EvoMixedIPDPayoffRunner
from pax.runners.runner_evo_mixed_payoffs_input import EvoMixedPayoffInputRunner
from pax.runners.runner_evo_mixed_payoffs_gen import EvoMixedPayoffGenRunner
from pax.runners.runner_evo_mixed_payoffs_pred import EvoMixedPayoffPredRunner
from pax.runners.runner_evo_mixed_payoffs_only_opp import EvoMixedPayoffOnlyOppRunner
from pax.runners.experimental.runner_evo_mixed_lr import EvoMixedLRRunner
from pax.runners.experimental.runner_evo_mixed_payoffs import EvoMixedPayoffRunner
from pax.runners.experimental.runner_evo_mixed_IPD_payoffs import EvoMixedIPDPayoffRunner
from pax.runners.experimental.runner_evo_mixed_payoffs_input import EvoMixedPayoffInputRunner
from pax.runners.experimental.runner_evo_mixed_payoffs_gen import EvoMixedPayoffGenRunner
from pax.runners.experimental.runner_evo_mixed_payoffs_pred import EvoMixedPayoffPredRunner
from pax.runners.experimental.runner_evo_mixed_payoffs_only_opp import EvoMixedPayoffOnlyOppRunner
from pax.runners.runner_evo_scanned import EvoScannedRunner

from pax.envs.iterated_tensor_game_n_player import IteratedTensorGameNPlayer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ class EvoMixedIPDPayoffRunner:
It composes together agents, watchers, and the environment.
Within the init, we declare vmaps and pmaps for training.
The environment provided must conform to a meta-environment.
Each opponent has a different payoff matrix that follows the IPD conditions but each member
of the evo population plays against the same payoff matrices to ensure fair comparison.
Args:
agents (Tuple[agents]):
The set of agents that will run in the experiment. Note, ordering is
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ class EvoMixedLRRunner:
It composes together agents, watchers, and the environment.
Within the init, we declare vmaps and pmaps for training.
The environment provided must conform to a meta-environment.
Each opponent has a different learning rate, but the members of the population
play against the same learning rates to ensure a fair comparison.
Args:
agents (Tuple[agents]):
The set of agents that will run in the experiment. Note, ordering is
Expand Down Expand Up @@ -212,7 +215,7 @@ def _inner_rollout(carry, unused):
obs2,
a2_mem,
)
jax.debug.print("env_params: {x}", x=env_params)
# jax.debug.print("env_params: {x}", x=env_params)
(next_obs1, next_obs2), env_state, rewards, done, info = env.step(
env_rng,
env_state,
Expand Down Expand Up @@ -338,10 +341,10 @@ def _rollout(
a2_rng,
agent2._mem.hidden,
)
# generate an array of shape [10]
random_numbers = jax.random.uniform(_rng_run, minval=1.0, maxval=1.0, shape=(10,))
# # repeat the array 1000 times along the first dimension
learning_rates = jnp.tile(random_numbers, (1000, 1))
# generate an array of shape [args.num_opps]
random_numbers = jax.random.uniform(_rng_run, minval=1e-5, maxval=1.0, shape=(args.num_opps,))
# # repeat the array popsize-times along the first dimension
learning_rates = jnp.tile(random_numbers, (args.popsize, 1))
a2_state.opt_state[2].hyperparams['step_size'] = learning_rates
# jax.debug.breakpoint()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class EvoMixedPayoffRunner:
It composes together agents, watchers, and the environment.
Within the init, we declare vmaps and pmaps for training.
The environment provided must conform to a meta-environment.
Payoff matrix is randomly sampled at each rollout. Each opponent has a different payoff matrix.
Args:
agents (Tuple[agents]):
The set of agents that will run in the experiment. Note, ordering is
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class EvoMixedPayoffGenRunner:
It composes together agents, watchers, and the environment.
Within the init, we declare vmaps and pmaps for training.
The environment provided must conform to a meta-environment.
Payoff matrix is randomly sampled at each rollout. Each opponent has the same payoff matrix.
Args:
agents (Tuple[agents]):
The set of agents that will run in the experiment. Note, ordering is
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ class EvoMixedPayoffInputRunner:
It composes together agents, watchers, and the environment.
Within the init, we declare vmaps and pmaps for training.
The environment provided must conform to a meta-environment.
Add payoff matrices as input to agents so they don't have to figure out payoff matrices on the go.
Either randomly sample and set a payoff matrix
Args:
agents (Tuple[agents]):
The set of agents that will run in the experiment. Note, ordering is
Expand Down Expand Up @@ -201,8 +203,8 @@ def _inner_rollout(carry, unused):
# a1_rng = rngs[:, :, :, 1, :]
# a2_rng = rngs[:, :, :, 2, :]
rngs = rngs[:, :, :, 3, :]
print("OBS1 shape: ", obs1.shape)
print("env params shape: ", env_params.payoff_matrix.shape)
# print("OBS1 shape: ", obs1.shape)
# print("env params shape: ", env_params.payoff_matrix.shape)
# flatten the payoff matrix and append it to the observations
# the observations have shape (500, 10, 2, 5) and the payoff matrix has shape (10, 4, 2)
# we want to append the payoff matrix to the observations so that the observations have shape (500, 10, 2, 5+8)
Expand Down Expand Up @@ -290,7 +292,7 @@ def _outer_rollout(carry, unused):
# MFOS has to take a meta-action for each episode
if args.agent1 == "MFOS":
a1_mem = agent1.meta_policy(a1_mem)
print("OBS2 shape: ", obs2.shape)
# print("OBS2 shape: ", obs2.shape)
# payoff_matrix = env_params.payoff_matrix.reshape((10, 8))
# payoff_matrix = jnp.tile(jnp.expand_dims(jnp.tile(payoff_matrix, (500, 1, 1)), 2), (1, 1, 2, 1))
# obs2_update = jnp.concatenate((obs2, payoff_matrix), axis=3)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ class EvoMixedPayoffOnlyOppRunner:
It composes together agents, watchers, and the environment.
Within the init, we declare vmaps and pmaps for training.
The environment provided must conform to a meta-environment.
Opponent plays a noisy payoff function of the original IPD payoff matrix.
Same noise applied to all opponents.
Args:
agents (Tuple[agents]):
The set of agents that will run in the experiment. Note, ordering is
Expand Down
Loading

0 comments on commit dd6555b

Please sign in to comment.