[BugFix] Fix collect with grad (#114)

* amend * amend * amend
facebookresearch · Jul 27, 2024 · 6edcdc1 · 6edcdc1
1 parent 4d7be55
commit 6edcdc1
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 2 deletions.
diff --git a/benchmarl/conf/experiment/base_experiment.yaml b/benchmarl/conf/experiment/base_experiment.yaml
@@ -82,7 +82,7 @@ render: True
 evaluation_interval: 120_000
 # Number of episodes that evaluation is run on
 evaluation_episodes: 10
-# If True, when stochastic policies are evaluated, their mode is taken, otherwise, if False, they are sampled
+# If True, when stochastic policies are evaluated, their deterministic value is taken, otherwise, if False, they are sampled
 evaluation_deterministic_actions: True
 
 # List of loggers to use, options are: wandb, csv, tensorboard, mflow

diff --git a/benchmarl/experiment/experiment.py b/benchmarl/experiment/experiment.py
@@ -582,7 +582,12 @@ def _collection_loop(self):
                         auto_reset=False,
                         tensordict=reset_batch,
                     )
-                    reset_batch = step_mdp(batch[..., -1])
+                    reset_batch = step_mdp(
+                        batch[..., -1],
+                        reward_keys=self.rollout_env.reward_keys,
+                        action_keys=self.rollout_env.action_keys,
+                        done_keys=self.rollout_env.done_keys,
+                    )
 
             # Logging collection
             collection_time = time.time() - iteration_start

diff --git a/test/test_vmas.py b/test/test_vmas.py
@@ -74,6 +74,24 @@ def test_all_tasks(
         )
         experiment.run()
 
+    def test_collect_with_grad(
+        self,
+        experiment_config,
+        mlp_sequence_config,
+        algo_config: AlgorithmConfig = IppoConfig,
+        task: Task = VmasTask.BALANCE,
+    ):
+        task = task.get_from_yaml()
+        experiment_config.collect_with_grad = True
+        experiment = Experiment(
+            algorithm_config=algo_config.get_from_yaml(),
+            model_config=mlp_sequence_config,
+            seed=0,
+            config=experiment_config,
+            task=task,
+        )
+        experiment.run()
+
     @pytest.mark.parametrize(
         "algo_config", [IppoConfig, QmixConfig, IsacConfig, IddpgConfig]
     )