amend

facebookresearch · Jul 27, 2024 · 5d66dbf · 5d66dbf
1 parent c21c30b
commit 5d66dbf
Show file tree

Hide file tree

Showing 7 changed files with 13 additions and 13 deletions.
diff --git a/benchmarl/algorithms/iddpg.py b/benchmarl/algorithms/iddpg.py
@@ -124,8 +124,8 @@ def _get_policy_for_loss(
                 out_keys=[(group, "action")],
                 distribution_class=TanhDelta if self.use_tanh_mapping else Delta,
                 distribution_kwargs={
-                    "low": self.action_spec[(group, "action")].space.low,
-                    "high": self.action_spec[(group, "action")].space.high,
+                    "min": self.action_spec[(group, "action")].space.low,
+                    "max": self.action_spec[(group, "action")].space.high,
                 }
                 if self.use_tanh_mapping
                 else {},

diff --git a/benchmarl/algorithms/ippo.py b/benchmarl/algorithms/ippo.py
@@ -158,8 +158,8 @@ def _get_policy_for_loss(
                 ),
                 distribution_kwargs=(
                     {
-                        "low": self.action_spec[(group, "action")].space.low,
-                        "high": self.action_spec[(group, "action")].space.high,
+                        "min": self.action_spec[(group, "action")].space.low,
+                        "max": self.action_spec[(group, "action")].space.high,
                     }
                     if self.use_tanh_normal
                     else {}

diff --git a/benchmarl/algorithms/isac.py b/benchmarl/algorithms/isac.py
@@ -203,8 +203,8 @@ def _get_policy_for_loss(
                 if not self.use_tanh_normal
                 else TanhNormal,
                 distribution_kwargs={
-                    "low": self.action_spec[(group, "action")].space.low,
-                    "high": self.action_spec[(group, "action")].space.high,
+                    "min": self.action_spec[(group, "action")].space.low,
+                    "max": self.action_spec[(group, "action")].space.high,
                 }
                 if self.use_tanh_normal
                 else {},

diff --git a/benchmarl/algorithms/maddpg.py b/benchmarl/algorithms/maddpg.py
@@ -124,8 +124,8 @@ def _get_policy_for_loss(
                 out_keys=[(group, "action")],
                 distribution_class=TanhDelta if self.use_tanh_mapping else Delta,
                 distribution_kwargs={
-                    "low": self.action_spec[(group, "action")].space.low,
-                    "high": self.action_spec[(group, "action")].space.high,
+                    "min": self.action_spec[(group, "action")].space.low,
+                    "max": self.action_spec[(group, "action")].space.high,
                 }
                 if self.use_tanh_mapping
                 else {},

diff --git a/benchmarl/algorithms/mappo.py b/benchmarl/algorithms/mappo.py
@@ -162,8 +162,8 @@ def _get_policy_for_loss(
                 ),
                 distribution_kwargs=(
                     {
-                        "low": self.action_spec[(group, "action")].space.low,
-                        "high": self.action_spec[(group, "action")].space.high,
+                        "min": self.action_spec[(group, "action")].space.low,
+                        "max": self.action_spec[(group, "action")].space.high,
                     }
                     if self.use_tanh_normal
                     else {}

diff --git a/benchmarl/algorithms/masac.py b/benchmarl/algorithms/masac.py
@@ -203,8 +203,8 @@ def _get_policy_for_loss(
                 if not self.use_tanh_normal
                 else TanhNormal,
                 distribution_kwargs={
-                    "low": self.action_spec[(group, "action")].space.low,
-                    "high": self.action_spec[(group, "action")].space.high,
+                    "min": self.action_spec[(group, "action")].space.low,
+                    "max": self.action_spec[(group, "action")].space.high,
                 }
                 if self.use_tanh_normal
                 else {},

diff --git a/benchmarl/experiment/experiment.py b/benchmarl/experiment/experiment.py
@@ -758,7 +758,7 @@ def _grad_clip(self, optimizer: torch.optim.Optimizer) -> float:
     def _evaluation_loop(self):
         evaluation_start = time.time()
         with set_exploration_type(
-            ExplorationType.DETERMINISTIC
+            ExplorationType.MODE
             if self.config.evaluation_deterministic_actions
             else ExplorationType.RANDOM
         ):