cleanup

ucl-dark · Sep 27, 2023 · 57899fd · 57899fd
1 parent 55dd914
commit 57899fd
Show file tree

Hide file tree

Showing 18 changed files with 487 additions and 2,089 deletions.
diff --git a/docs/envs.md b/docs/envs.md
@@ -1,13 +1,17 @@
 ## Environments 
 
-Pax includes many environments specified by `env_id`. These are `infinite_matrix_game`, `iterated_matrix_game` and `coin_game`. Independetly you can specify your enviroment type as either a meta environment (with an inner/ outer loop) by `env_type`, the options supported are `sequential` or `meta`.
+Pax includes many environments specified by `env_id`. These are `infinite_matrix_game`, `iterated_matrix_game` and `coin_game`. Independently you can specify your environment type as either a meta environment (with an inner/ outer loop) by `env_type`, the options supported are `sequential` or `meta`.
 
 These are specified in the config files in `pax/configs/{env_id}/EXPERIMENT.yaml`. 
 
-| Environment ID | Environment Type | Description |
-| ----------- | ----------- | ----------- |
-|`iterated_matrix_game`| `sequential`      | An iterated matrix game with a predetermined number of timesteps per episode with a discount factor $\gamma$         |
-|`iterated_matrix_game` | `meta`    | A meta game over the iterated matrix game with an outer agent (player 1) and an inner agent (player 2). The inner updates every episode, while the the outer agent updates every meta-episode |
-|`infinite_matrix_game` | `meta`| An infinite matrix game that calculates exact returns given a payoff and discount factor $\gamma$       |
-|coin_game | `sequential`    | A sequential series of episode of the coin game between two players. Each player updates at the end of an episode|
-|coin_game | `meta`    | A meta learning version of the coin game with an outer agent (player 1) and an inner agent (player 2). The inner updates every episode, while the the outer agent updates every meta-episode|
+| Environment ID         | Environment Type    | Description                                                                                                                                                                                                        |
+|------------------------|---------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `iterated_matrix_game` | `sequential`        | An iterated matrix game with a predetermined number of timesteps per episode with a discount factor $\gamma$                                                                                                       |
+| `iterated_matrix_game` | `meta`              | A meta game over the iterated matrix game with an outer agent (player 1) and an inner agent (player 2). The inner updates every episode, while the the outer agent updates every meta-episode                      |
+| `infinite_matrix_game` | `meta`              | An infinite matrix game that calculates exact returns given a payoff and discount factor $\gamma$                                                                                                                  |
+| coin_game              | `sequential`        | A sequential series of episode of the coin game between two players. Each player updates at the end of an episode                                                                                                  |
+| coin_game              | `meta`              | A meta learning version of the coin game with an outer agent (player 1) and an inner agent (player 2). The inner updates every episode, while the the outer agent updates every meta-episode                       |
+| cournot                | `sequential`/`meta` | A one-shot version of a [Cournot competition](https://en.wikipedia.org/wiki/Cournot_competition)                                                                                                                   |
+| fishery                | `sequential`/`meta` | A dynamic resource harvesting game as specified in Perman et al.                                                                                                                                                   |
+| Rice-N                 | `sequential`/`meta` | A re-implementation of the Integrated Assessment Model introduced by [Zhang et al.](https://papers.ssrn.com/abstract=4189735) available with either the original 27 regions or a new calibration of only 5 regions |
+| C-Rice-N               | `sequential`/`meta` | An extension of Rice-N with a simple climate club mechanism                                                                                                                                                        |
diff --git a/pax/conf/experiment/c_rice/debug.yaml b/pax/conf/experiment/c_rice/debug.yaml
@@ -0,0 +1,81 @@
+# @package _global_
+
+# Agents
+agent1: 'PPO'
+agent_default: 'PPO'
+
+# Environment
+env_id: C-Rice-v1
+env_type: meta
+num_players: 6
+has_mediator: True
+config_folder: pax/envs/Rice/5_regions
+runner: tensor_evo
+
+# Training
+top_k: 5
+popsize: 1000
+num_envs: 1
+num_opps: 1
+num_outer_steps: 2
+num_inner_steps: 20
+num_iters: 1
+num_devices: 1
+num_steps: 4
+
+
+# PPO agent parameters
+ppo_default:
+  num_minibatches: 4
+  num_epochs: 4
+  gamma: 1.0
+  gae_lambda: 0.95
+  ppo_clipping_epsilon: 0.2
+  value_coeff: 0.5
+  clip_value: True
+  max_gradient_norm: 0.5
+  anneal_entropy: False
+  entropy_coeff_start: 0.0
+  entropy_coeff_horizon: 10000000
+  entropy_coeff_end: 0.0
+  lr_scheduling: True
+  learning_rate: 1e-4
+  adam_epsilon: 1e-5
+  with_memory: True
+  with_cnn: False
+  output_channels: 16
+  kernel_shape: [3, 3]
+  separate: True
+  hidden_size: 32
+
+# ES parameters
+es:
+  algo: OpenES        # [OpenES, CMA_ES]
+  sigma_init: 0.04    # Initial scale of isotropic Gaussian noise
+  sigma_decay: 0.999  # Multiplicative decay factor
+  sigma_limit: 0.01   # Smallest possible scale
+  init_min: 0.0       # Range of parameter mean initialization - Min
+  init_max: 0.0       # Range of parameter mean initialization - Max
+  clip_min: -1e10     # Range of parameter proposals - Min
+  clip_max: 1e10      # Range of parameter proposals - Max
+  lrate_init: 0.01    # Initial learning rate
+  lrate_decay: 0.9999 # Multiplicative decay factor
+  lrate_limit: 0.001  # Smallest possible lrate
+  beta_1: 0.99        # Adam - beta_1
+  beta_2: 0.999       # Adam - beta_2
+  eps: 1e-8           # eps constant,
+  centered_rank: False # Fitness centered_rank
+  w_decay: 0           # Decay old elite fitness
+  maximise: True       # Maximise fitness
+  z_score: False       # Normalise fitness
+  mean_reduce: True    # Remove mean
+
+# Logging setup
+wandb:
+  project: c-Rice
+  group: 'mediator'
+  mode: 'offline'
+  name: 'c-Rice-mediator-GS-${agent_default}-seed-${seed}'
+  log: False
+
+
diff --git a/pax/conf/experiment/c_rice/marl_baseline.yaml b/pax/conf/experiment/c_rice/marl_baseline.yaml
@@ -0,0 +1,54 @@
+# @package _global_
+
+# Agents
+agent_default: 'PPO'
+
+# Environment
+env_id: C-Rice-v1
+env_type: meta
+num_players: 6
+has_mediator: True
+config_folder: pax/envs/Rice/5_regions
+runner: tensor_evo
+
+# Training
+top_k: 5
+popsize: 1000
+num_envs: 2
+num_opps: 1
+num_outer_steps: 1
+num_inner_steps: 20
+num_iters: 2000
+num_devices: 1
+num_steps: 200
+
+# PPO agent parameters
+ppo_default:
+  num_minibatches: 4
+  num_epochs: 4
+  gamma: 1.0
+  gae_lambda: 0.95
+  ppo_clipping_epsilon: 0.2
+  value_coeff: 0.5
+  clip_value: True
+  max_gradient_norm: 0.5
+  anneal_entropy: False
+  entropy_coeff_start: 0.0
+  entropy_coeff_horizon: 10000000
+  entropy_coeff_end: 0.0
+  lr_scheduling: True
+  learning_rate: 1e-4
+  adam_epsilon: 1e-5
+  with_memory: True
+  with_cnn: False
+  output_channels: 16
+  kernel_shape: [3, 3]
+  separate: True
+  hidden_size: 64
+
+# Logging setup
+wandb:
+  project: c-Rice
+  group: 'mediator'
+  name: 'c-Rice-MARL-${agent_default}-seed-${seed}'
+  log: True
diff --git a/pax/conf/experiment/c_rice/mediator_gs_naive.yaml b/pax/conf/experiment/c_rice/mediator_gs_naive.yaml
@@ -0,0 +1,90 @@
+# @package _global_
+
+# Agents
+agent1: 'PPO'
+agent_default: 'Naive'
+
+# Environment
+env_id: C-Rice-v1
+env_type: meta
+num_players: 6
+has_mediator: True
+config_folder: pax/envs/Rice/5_regions
+runner: tensor_evo
+
+# Training
+top_k: 5
+popsize: 1000
+num_envs: 1
+num_opps: 1
+num_outer_steps: 1
+num_inner_steps: 20
+num_iters: 3500
+num_devices: 1
+num_steps: 4
+
+
+# PPO agent parameters
+ppo_default:
+  num_minibatches: 4
+  num_epochs: 4
+  gamma: 1.0
+  gae_lambda: 0.95
+  ppo_clipping_epsilon: 0.2
+  value_coeff: 0.5
+  clip_value: True
+  max_gradient_norm: 0.5
+  anneal_entropy: False
+  entropy_coeff_start: 0.0
+  entropy_coeff_horizon: 10000000
+  entropy_coeff_end: 0.0
+  lr_scheduling: True
+  learning_rate: 1e-4
+  adam_epsilon: 1e-5
+  with_memory: True
+  with_cnn: False
+  output_channels: 16
+  kernel_shape: [3, 3]
+  separate: True
+  hidden_size: 64
+
+naive:
+  num_minibatches: 1
+  num_epochs: 1
+  gamma: 1
+  gae_lambda: 0.95
+  max_gradient_norm: 1.0
+  learning_rate: 1.0
+  adam_epsilon: 1e-5
+  entropy_coeff: 0.0
+
+# ES parameters
+es:
+  algo: OpenES        # [OpenES, CMA_ES]
+  sigma_init: 0.04    # Initial scale of isotropic Gaussian noise
+  sigma_decay: 0.999  # Multiplicative decay factor
+  sigma_limit: 0.01   # Smallest possible scale
+  init_min: 0.0       # Range of parameter mean initialization - Min
+  init_max: 0.0       # Range of parameter mean initialization - Max
+  clip_min: -1e10     # Range of parameter proposals - Min
+  clip_max: 1e10      # Range of parameter proposals - Max
+  lrate_init: 0.01    # Initial learning rate
+  lrate_decay: 0.9999 # Multiplicative decay factor
+  lrate_limit: 0.001  # Smallest possible lrate
+  beta_1: 0.99        # Adam - beta_1
+  beta_2: 0.999       # Adam - beta_2
+  eps: 1e-8           # eps constant,
+  centered_rank: False # Fitness centered_rank
+  w_decay: 0           # Decay old elite fitness
+  maximise: True       # Maximise fitness
+  z_score: False       # Normalise fitness
+  mean_reduce: True    # Remove mean
+
+# Logging setup
+wandb:
+  project: c-Rice
+  group: 'mediator'
+  name: 'c-Rice-mediator-GS-${agent_default}-seed-${seed}'
+  log: True
+
+
diff --git a/pax/conf/experiment/c_rice/mediator_gs_ppo.yaml b/pax/conf/experiment/c_rice/mediator_gs_ppo.yaml
@@ -0,0 +1,80 @@
+# @package _global_
+
+# Agents
+agent1: 'PPO'
+agent_default: 'PPO'
+
+# Environment
+env_id: C-Rice-v1
+env_type: meta
+num_players: 6
+has_mediator: True
+config_folder: pax/envs/Rice/5_regions
+runner: tensor_evo
+
+# Training
+top_k: 5
+popsize: 1000
+num_envs: 1
+num_opps: 1
+num_outer_steps: 200
+num_inner_steps: 20
+num_iters: 3500
+num_devices: 1
+num_steps: 200
+
+
+# PPO agent parameters
+ppo_default:
+  num_minibatches: 4
+  num_epochs: 4
+  gamma: 1.0
+  gae_lambda: 0.95
+  ppo_clipping_epsilon: 0.2
+  value_coeff: 0.5
+  clip_value: True
+  max_gradient_norm: 0.5
+  anneal_entropy: False
+  entropy_coeff_start: 0.0
+  entropy_coeff_horizon: 10000000
+  entropy_coeff_end: 0.0
+  lr_scheduling: True
+  learning_rate: 1e-4
+  adam_epsilon: 1e-5
+  with_memory: True
+  with_cnn: False
+  output_channels: 16
+  kernel_shape: [3, 3]
+  separate: True
+  hidden_size: 64
+
+# ES parameters
+es:
+  algo: OpenES        # [OpenES, CMA_ES]
+  sigma_init: 0.04    # Initial scale of isotropic Gaussian noise
+  sigma_decay: 0.999  # Multiplicative decay factor
+  sigma_limit: 0.01   # Smallest possible scale
+  init_min: 0.0       # Range of parameter mean initialization - Min
+  init_max: 0.0       # Range of parameter mean initialization - Max
+  clip_min: -1e10     # Range of parameter proposals - Min
+  clip_max: 1e10      # Range of parameter proposals - Max
+  lrate_init: 0.01    # Initial learning rate
+  lrate_decay: 0.9999 # Multiplicative decay factor
+  lrate_limit: 0.001  # Smallest possible lrate
+  beta_1: 0.99        # Adam - beta_1
+  beta_2: 0.999       # Adam - beta_2
+  eps: 1e-8           # eps constant,
+  centered_rank: False # Fitness centered_rank
+  w_decay: 0           # Decay old elite fitness
+  maximise: True       # Maximise fitness
+  z_score: False       # Normalise fitness
+  mean_reduce: True    # Remove mean
+
+# Logging setup
+wandb:
+  project: c-Rice
+  group: 'mediator'
+  name: 'c-Rice-mediator-GS-${agent_default}-seed-${seed}'
+  log: True
+
+