diff --git a/.github/unittest/install_dependencies.sh b/.github/unittest/install_dependencies.sh index e44c5bd9..a75ba321 100644 --- a/.github/unittest/install_dependencies.sh +++ b/.github/unittest/install_dependencies.sh @@ -1,7 +1,7 @@ python -m pip install --upgrade pip -python -m pip install flake8 pytest pytest-cov hydra-core +python -m pip install flake8 pytest pytest-cov hydra-core tqdm if [ -f requirements.txt ]; then pip install -r requirements.txt; fi diff --git a/README.md b/README.md index 476ef1c2..c5c4ae8f 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,203 @@ # BenchMARL +[![tests](https://github.com/facebookresearch/BenchMARL/actions/workflows/unit_tests.yml/badge.svg)](test) +[![Python](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10-blue.svg)](https://www.python.org/downloads/) +```bash +python benchmarl/run.py algorithm=mappo task=vmas/balance +``` + +BenchMARL is a Multi-Agent Reinforcement Learning (MARL) training library created to enable reproducibility +and benchmarking across different MARL algorithms and environments. +Its mission is to present a standardized interface that allows easy integration of new algorithms and environments to +provide a fair comparison with existing solutions. +BenchMARL uses [TorchRL](https://github.com/pytorch/rl) as its backend, which grants it high performance +and state-of-the-art implementations. +BenchMARL data reporting is compatible with [marl-eval](https://github.com/instadeepai/marl-eval) for standardized and +statistically strong evaluations. + +- [BenchMARL](#benchmarl) + * [How to use](#how-to-use) + + [Notebooks](#notebooks) + + [Install](#install) + + [Run](#run) + * [Concept](#concept) + + [Experiment](#experiment) + + [Benchmark](#benchmark) + + [Algorithms](#algorithms) + + [Tasks](#tasks) + + [Models](#models) + * [Reporting and plotting](#reporting-and-plotting) + * [Extending](#extending) + * [Configuring](#configuring) + + [Algorithm](#algorithm) + + [Task](#task) + + [Model](#model) + * [Features](#features) + + [Logging](#logging) + + [Checkpointing](#checkpointing) + + [Callbacks](#callbacks) + + +## How to use + +### Notebooks + +### Install + +#### Install TorchRL + +Currently BenchMARL uses the latest version of TorchRL, +this will be installed automatically in future versions. + +```bash +pip install git+https://github.com/pytorch-labs/tensordict +git clone https://github.com/pytorch/rl.git +cd rl +python setup.py develop +cd .. +``` + +#### Install BenchMARL +You can just install it from github +```bash +pip install git+https://github.com/facebookresearch/BenchMARL +``` +Or also clone it locally to access the configs and scripts +```bash +git clone https://github.com/facebookresearch/BenchMARL.git +pip install -e BenchMARL +``` +#### Install environments + +All enviornment dependencies are optional in BenchMARL and can be installed separately. +##### VMAS +```bash +pip install vmas +``` -## Hydra config +##### PettingZoo +```bash +pip install "pettingzoo[all]" +``` + +##### SMACv2 + +Follow the instructions on the environment [repository](https://github.com/oxwhirl/smacv2). + +### Run + +Experiments are launched with a [default configuration](benchmarl/conf) that +can be overridden in many ways. +To learn how to customize and override configurations +please refer to the [configuring section](#configuring). + +#### Command line + +To launch an experiment from the command line you can do + +```bash +python benchmarl/run.py algorithm=mappo task=vmas/balance +``` + +Thanks to [hydra](https://hydra.cc/docs/intro/), you can run benchmarks as multi-runs like: +```bash +python benchmarl/run.py -m algorithm=mappo,qmix,masac task=vmas/balance,vmas/sampling seed=0,1 +``` +The default implementation for hydra multi-runs is sequential, but [parallel execution is +also available](https://hydra.cc/docs/plugins/joblib_launcher/). + +#### Script + +You can also load and launch your experiments from within a script + +```python + experiment = Experiment( + task=VmasTask.BALANCE.get_from_yaml(), + algorithm_config=MappoConfig.get_from_yaml(), + model_config=MlpConfig.get_from_yaml(), + critic_model_config=MlpConfig.get_from_yaml(), + seed=0, + config=ExperimentConfig.get_from_yaml(), +) +experiment.run() +``` + +See an example [here](examples/run_experiment.py). + +You can also run multiple experiments in a `Benchmark` + +```python +benchmark = Benchmark( + algorithm_configs=[ + MappoConfig.get_from_yaml(), + QmixConfig.get_from_yaml(), + MasacConfig.get_from_yaml(), + ], + tasks=[ + VmasTask.BALANCE.get_from_yaml(), + VmasTask.SAMPLING.get_from_yaml(), + ], + seeds={0, 1}, + experiment_config=ExperimentConfig.get_from_yaml(), + model_config=MlpConfig.get_from_yaml(), + critic_model_config=MlpConfig.get_from_yaml(), +) +benchmark.run_sequential() +``` +See an example [here](examples/run_benchmark.py). + +## Concept + +The goal of BenchMARL is to bring different MARL environments and algorithms +under the same interfaces to enable fair and reproducible comaprison and benchmarking. +BenchMARL is a full-pipline unified training library with the goal of enabling users to run +any comparison they want across our algorithms and tasks in just one line of code. +To achieve this, BenchMARL interconnects components from [TorchRL](https://github.com/pytorch/rl), +which provides an efficient and reliable backend. + +The library has a [default configuration](benchmarl/conf) for each of its components. +While parts of this configuration are supposed to be changed (for example experiment configurations), +other parts (such as tasks) should not be changed to allow for reproducibility. +To aid in this, each version of BenchMARL is paired to a default configuration. + +Let's now introduce each component in the library. + +### Experiment +Experiment configurations are in [`benchmarl/conf/config.yaml`](benchmarl/conf/config.yaml), +with the experiment hyperparameters in configured in [`benchmarl/conf/experiment`](benchmarl/conf/experiment). + +An experiment is a training run in which an algorithm, a task, and a model are fixed. +Experiments have to be configured by passing these values alongside a seed and their hyperparameters. +The experiment [hyperparameters](benchmarl/conf/experiment/base_experiment.yaml) cover both +on-policy and off-policy algorithms, discrete and continuous actions, and probabilistic and deterministic policies +(as they are agnostic of the algorithm or task used). +An experiment can be launched from the command line or from a script. +See the [run](#run) section for more information. + +### Benchmark + +In the library we call `benchmark` a collection of experiments that can vary in tasks, algorithm, or model. +A benchmark shares the same experiment configuration across all of its experiments. +A benchmark can be launched from the command line or from a script. +See the [run](#run) section for more information. + +### Algorithms +TBC +### Tasks +TBC +### Models +TBC + +## Reporting and plotting +TBC + +## Extending +TBC + + +## Configuring Running custom experiments is extremely simplified by the [Hydra](https://hydra.cc/) configurations. @@ -74,7 +268,10 @@ Configuring a layer python hydra_run.py "model.layers.l1.num_cells=[3]" ``` -## Logging + +## Features + +### Logging BenchMARL is compatible with the [TorchRL loggers](https://github.com/pytorch/rl/tree/main/torchrl/record/loggers). A list of logger names can be provided in the [experiment config](benchmarl/conf/experiment/base_experiment.yaml). @@ -86,3 +283,8 @@ python hydra_run.py "experiment.loggers=[wandb]" Additionally, you can specify a `create_json` argument which instructs the trainer to output a `.json` file in the format specified by [marl-eval](https://github.com/instadeepai/marl-eval). + +### Checkpointing +TBC +### Callbacks +TBC diff --git a/benchmarl/benchmark.py b/benchmarl/benchmark.py index 48a49dd5..f2e432ea 100644 --- a/benchmarl/benchmark.py +++ b/benchmarl/benchmark.py @@ -48,4 +48,9 @@ def get_experiments(self) -> Iterator[Experiment]: def run_sequential(self): for i, experiment in enumerate(self.get_experiments()): print(f"\nRunning experiment {i+1}/{self.n_experiments}.\n") - experiment.run() + try: + experiment.run() + except KeyboardInterrupt as interrupt: + print("\n\nBenchmark was closed gracefully\n\n") + experiment.close() + raise interrupt diff --git a/examples/run_benchmark.bash b/examples/run_benchmark.bash new file mode 100644 index 00000000..58e8b644 --- /dev/null +++ b/examples/run_benchmark.bash @@ -0,0 +1 @@ +python benchmarl/run.py -m algorithm=mappo,qmix,masac task=vmas/balance,vmas/sampling seed=0,1 diff --git a/examples/simple_run.py b/examples/run_benchmark.py similarity index 51% rename from examples/simple_run.py rename to examples/run_benchmark.py index 11b7c8ea..39434dec 100644 --- a/examples/simple_run.py +++ b/examples/run_benchmark.py @@ -1,37 +1,34 @@ -from benchmarl.algorithms import MaddpgConfig, MappoConfig, MasacConfig, QmixConfig +from benchmarl.algorithms import MappoConfig, MasacConfig, QmixConfig from benchmarl.benchmark import Benchmark from benchmarl.environments import VmasTask from benchmarl.experiment import ExperimentConfig -from benchmarl.models.common import SequenceModelConfig from benchmarl.models.mlp import MlpConfig -from torch import nn if __name__ == "__main__": + # Loads from "benchmarl/conf/experiment/base_experiment.yaml" experiment_config = ExperimentConfig.get_from_yaml() - tasks = [VmasTask.BALANCE.get_from_yaml()] + + # Loads from "benchmarl/conf/task" + tasks = [VmasTask.BALANCE.get_from_yaml(), VmasTask.SAMPLING.get_from_yaml()] + + # Loads from "benchmarl/conf/algorithm" algorithm_configs = [ MappoConfig.get_from_yaml(), - MaddpgConfig.get_from_yaml(), QmixConfig.get_from_yaml(), MasacConfig.get_from_yaml(), ] - seeds = {0} - # Model still need to be refactored for hydra loading - model_config = SequenceModelConfig( - model_configs=[ - MlpConfig.get_from_yaml(), - MlpConfig(num_cells=[256], layer_class=nn.Linear, activation_class=nn.Tanh), - ], - intermediate_sizes=[128], - ) + # Loads from "benchmarl/conf/model/layers" + model_config = MlpConfig.get_from_yaml() + critic_model_config = MlpConfig.get_from_yaml() benchmark = Benchmark( algorithm_configs=algorithm_configs, tasks=tasks, - seeds=seeds, + seeds={0, 1}, experiment_config=experiment_config, model_config=model_config, + critic_model_config=critic_model_config, ) benchmark.run_sequential() diff --git a/examples/run_experiment.bash b/examples/run_experiment.bash new file mode 100644 index 00000000..0863f39c --- /dev/null +++ b/examples/run_experiment.bash @@ -0,0 +1 @@ +python benchmarl/run.py algorithm=mappo task=vmas/balance diff --git a/examples/run_experiment.py b/examples/run_experiment.py new file mode 100644 index 00000000..72d82502 --- /dev/null +++ b/examples/run_experiment.py @@ -0,0 +1,32 @@ +from benchmarl.algorithms import MappoConfig + +from benchmarl.environments import VmasTask + +from benchmarl.experiment import Experiment, ExperimentConfig + +from benchmarl.models.mlp import MlpConfig + +if __name__ == "__main__": + + # Loads from "benchmarl/conf/experiment/base_experiment.yaml" + experiment_config = ExperimentConfig.get_from_yaml() + + # Loads from "benchmarl/conf/task/vmas/balance.yaml" + task = VmasTask.BALANCE.get_from_yaml() + + # Loads from "benchmarl/conf/algorithm/mappo.yaml" + algorithm_config = MappoConfig.get_from_yaml() + + # Loads from "benchmarl/conf/model/layers/mlp.yaml" + model_config = MlpConfig.get_from_yaml() + critic_model_config = MlpConfig.get_from_yaml() + + experiment = Experiment( + task=task, + algorithm_config=algorithm_config, + model_config=model_config, + critic_model_config=critic_model_config, + seed=0, + config=experiment_config, + ) + experiment.run() diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 78620c47..00000000 --- a/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -tqdm diff --git a/setup.py b/setup.py index 87ebb340..c16873a3 100644 --- a/setup.py +++ b/setup.py @@ -8,9 +8,9 @@ author="Matteo Bettini", author_email="mb2389@cl.cam.ac.uk", packages=find_packages(), - install_requires=["torchrl", "tqdm", "hydra-core"], + install_requires=["tqdm", "hydra-core"], extras_require={ - "tasks": ["vmas>=1.2.10", "pettingzoo[all]>=1.24.1"], + "vmas": ["vmas>=1.2.10"], + "pettingzoo": ["pettingzoo[all]>=1.24.1"], }, - include_package_data=True, )