From 49d335c01b39a3c1f0a9143c2d4a4eb2ef2f767b Mon Sep 17 00:00:00 2001
From: MC <114601812+mchoilab@users.noreply.github.com>
Date: Tue, 9 Jul 2024 19:12:26 +0200
Subject: [PATCH] [Docs] WandB Sweeps example with BenchMARL (#105)

* Adding a test file to new branch

* Update README and add sweep configuration file

- Updated README.md with instructions on using W&B sweeps with Hydra
- Added sweepconfig.yaml for defining hyperparameter sweeps

This commit includes detailed steps for setting up and running W&B sweeps with BenchMARL and Hydra, along with a sample configuration file.

* update readme

* update README and config

* Update examples/sweep/wandb/sweepconfig.yaml

* pre-commit

---------

Co-authored-by: M. Choi <m.choi@robeco.nl>
Co-authored-by: Matteo Bettini <55539777+matteobettini@users.noreply.github.com>
---
 examples/sweep/wandb/readme.md        | 81 +++++++++++++++++++++++++++
 examples/sweep/wandb/sweepconfig.yaml | 56 ++++++++++++++++++
 2 files changed, 137 insertions(+)
 create mode 100644 examples/sweep/wandb/readme.md
 create mode 100644 examples/sweep/wandb/sweepconfig.yaml

diff --git a/examples/sweep/wandb/readme.md b/examples/sweep/wandb/readme.md
new file mode 100644
index 00000000..c449e1e8
--- /dev/null
+++ b/examples/sweep/wandb/readme.md
@@ -0,0 +1,81 @@
+# Using Weights & Biases (W&B) Sweeps with BenchMARL
+
+You can improve the performance of your RL agents with hyperparameter tuning. It's easy to train multiple models with different hyperparameters using hyperparameter sweep on W&B with BenchMARL and Hydra. Modify `sweepconfig.yaml` to define your sweep configuration and run it from the command line.
+
+## Prerequisites
+
+- Ensure you have Weights & Biases: `pip install wandb` installed on top of benchmarl requirements.
+
+- Update the `benchmarl/conf/config.yaml` with your desired experiment setup, e.g.:
+
+```yaml
+defaults:
+  - experiment: base_experiment
+  - algorithm: ippo
+  - task: customenv/task_1
+  - model: layers/mlp
+  - model@critic_model: layers/mlp
+  - _self_
+
+seed: 0
+```
+
+## Step 1: Define Your Sweep Configuration
+
+First, create or modify the `sweepconfig.yaml` file. Check the [W&B Sweep Configuration Documentation](https://docs.wandb.ai/guides/sweeps/sweep-config-keys) for detailed configuration options.
+
+
+The YAML file already contains the basic elements required to work with BenchMARL. Change the values according to your desired experiment setup. Note that the parameters in the YAML file should use dots (e.g., `experiment.lr`) rather than standard double nested configurations ([like in this community discussion](https://community.wandb.ai/t/nested-sweep-configuration/3369)) since you are using Hydra.
+
+
+```yaml
+entity: "ENTITY_NAME"
+
+#options: bayes, random, grid
+method: bayes
+
+metric:
+  name: eval/agent/reward/episode_reward_mean
+  goal: maximize
+
+parameters:
+  experiment.lr:
+    max: 0.003
+    min: 0.000025
+    # distribution: uniform
+
+  experiment.max_n_iters:
+    value: 321
+
+```
+
+## Step 2: Initialize sweep
+
+To run the sweep, initialize it using the following command in your terminal:
+
+```bash
+wandb sweep sweepconfig.yaml
+```
+
+W&B will automatically create a sweep and return a command for you to run, like:
+
+```bash
+wandb: Created sweep with ID: xyz123
+wandb: View sweep at: https://wandb.ai/your_entity/your_project/sweeps/xyz123
+wandb: Run sweep agent with: wandb agent your_entity/your_project/xyz123
+```
+
+## Step 3: Start sweep agents
+Run the command provided in the terminal to start the sweep agent:
+
+```bash
+wandb agent mc-team/project-name/xyz123
+```
+
+This will start the agent and begin running experiments according to your sweep configuration.
+
+## References
+
+https://wandb.ai/adrishd/hydra-example/reports/Configuring-W-B-Projects-with-Hydra--VmlldzoxNTA2MzQw?galleryTag=posts&utm_source=fully_connected&utm_medium=blog&utm_campaign=hydra
+
+https://docs.wandb.ai/guides/sweeps
diff --git a/examples/sweep/wandb/sweepconfig.yaml b/examples/sweep/wandb/sweepconfig.yaml
new file mode 100644
index 00000000..7daeaacd
--- /dev/null
+++ b/examples/sweep/wandb/sweepconfig.yaml
@@ -0,0 +1,56 @@
+program: PATH_TO_YOUR_DIRECTORY\benchmarl\run.py
+project: "YOUR_PROJECT_NAME"
+entity: "YOUR_ENTITY_NAME"
+
+method: bayes 
+
+metric:
+  name: eval/agent/reward/episode_reward_mean
+  goal: maximize
+
+parameters:
+
+  # experiment hyperparameters 
+
+  experiment.lr:
+    max: 0.003
+    min: 0.000025
+    # distribution: uniform
+
+  experiment.max_n_iters:
+   value: 321
+  experiment.on_policy_collected_frames_per_batch: 
+    value: 4040
+  experiment.on_policy_n_minibatch_iters:
+    values: [1, 2]
+
+  experiment.on_policy_minibatch_size:
+    values: [64, 128, 256]
+
+  # algorithm hyperparameters 
+  algorithm.entropy_coef:
+      max: 0.05
+      min: 0
+      distribution: uniform
+
+  # task hyperparameters
+  task.goal_type:
+    value: "corr"
+    # distribution: categorical
+
+        
+early_terminate:
+  type: hyperband
+  max_iter: 27
+  s: 3
+  # seed:
+  #   max: 84
+  #   min: 0
+  #   distribution: int_uniform
+
+command:
+  - ${env}
+  - python
+  - ${program}
+  - ${args_no_hyphens}
+