From 5d66dbf0c57e83e7eb8c89e957193b9f3b6b2a7c Mon Sep 17 00:00:00 2001 From: Matteo Bettini Date: Sat, 27 Jul 2024 19:52:54 +0200 Subject: [PATCH] amend --- benchmarl/algorithms/iddpg.py | 4 ++-- benchmarl/algorithms/ippo.py | 4 ++-- benchmarl/algorithms/isac.py | 4 ++-- benchmarl/algorithms/maddpg.py | 4 ++-- benchmarl/algorithms/mappo.py | 4 ++-- benchmarl/algorithms/masac.py | 4 ++-- benchmarl/experiment/experiment.py | 2 +- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/benchmarl/algorithms/iddpg.py b/benchmarl/algorithms/iddpg.py index 22655af7..a114123b 100644 --- a/benchmarl/algorithms/iddpg.py +++ b/benchmarl/algorithms/iddpg.py @@ -124,8 +124,8 @@ def _get_policy_for_loss( out_keys=[(group, "action")], distribution_class=TanhDelta if self.use_tanh_mapping else Delta, distribution_kwargs={ - "low": self.action_spec[(group, "action")].space.low, - "high": self.action_spec[(group, "action")].space.high, + "min": self.action_spec[(group, "action")].space.low, + "max": self.action_spec[(group, "action")].space.high, } if self.use_tanh_mapping else {}, diff --git a/benchmarl/algorithms/ippo.py b/benchmarl/algorithms/ippo.py index 012c6880..aac2cd88 100644 --- a/benchmarl/algorithms/ippo.py +++ b/benchmarl/algorithms/ippo.py @@ -158,8 +158,8 @@ def _get_policy_for_loss( ), distribution_kwargs=( { - "low": self.action_spec[(group, "action")].space.low, - "high": self.action_spec[(group, "action")].space.high, + "min": self.action_spec[(group, "action")].space.low, + "max": self.action_spec[(group, "action")].space.high, } if self.use_tanh_normal else {} diff --git a/benchmarl/algorithms/isac.py b/benchmarl/algorithms/isac.py index e7e3dd6a..74c29ea7 100644 --- a/benchmarl/algorithms/isac.py +++ b/benchmarl/algorithms/isac.py @@ -203,8 +203,8 @@ def _get_policy_for_loss( if not self.use_tanh_normal else TanhNormal, distribution_kwargs={ - "low": self.action_spec[(group, "action")].space.low, - "high": self.action_spec[(group, "action")].space.high, + "min": self.action_spec[(group, "action")].space.low, + "max": self.action_spec[(group, "action")].space.high, } if self.use_tanh_normal else {}, diff --git a/benchmarl/algorithms/maddpg.py b/benchmarl/algorithms/maddpg.py index 673bb792..1590f81f 100644 --- a/benchmarl/algorithms/maddpg.py +++ b/benchmarl/algorithms/maddpg.py @@ -124,8 +124,8 @@ def _get_policy_for_loss( out_keys=[(group, "action")], distribution_class=TanhDelta if self.use_tanh_mapping else Delta, distribution_kwargs={ - "low": self.action_spec[(group, "action")].space.low, - "high": self.action_spec[(group, "action")].space.high, + "min": self.action_spec[(group, "action")].space.low, + "max": self.action_spec[(group, "action")].space.high, } if self.use_tanh_mapping else {}, diff --git a/benchmarl/algorithms/mappo.py b/benchmarl/algorithms/mappo.py index 3ddd8d53..891200ef 100644 --- a/benchmarl/algorithms/mappo.py +++ b/benchmarl/algorithms/mappo.py @@ -162,8 +162,8 @@ def _get_policy_for_loss( ), distribution_kwargs=( { - "low": self.action_spec[(group, "action")].space.low, - "high": self.action_spec[(group, "action")].space.high, + "min": self.action_spec[(group, "action")].space.low, + "max": self.action_spec[(group, "action")].space.high, } if self.use_tanh_normal else {} diff --git a/benchmarl/algorithms/masac.py b/benchmarl/algorithms/masac.py index feee4398..358010ef 100644 --- a/benchmarl/algorithms/masac.py +++ b/benchmarl/algorithms/masac.py @@ -203,8 +203,8 @@ def _get_policy_for_loss( if not self.use_tanh_normal else TanhNormal, distribution_kwargs={ - "low": self.action_spec[(group, "action")].space.low, - "high": self.action_spec[(group, "action")].space.high, + "min": self.action_spec[(group, "action")].space.low, + "max": self.action_spec[(group, "action")].space.high, } if self.use_tanh_normal else {}, diff --git a/benchmarl/experiment/experiment.py b/benchmarl/experiment/experiment.py index b8a294c8..b09a7abc 100644 --- a/benchmarl/experiment/experiment.py +++ b/benchmarl/experiment/experiment.py @@ -758,7 +758,7 @@ def _grad_clip(self, optimizer: torch.optim.Optimizer) -> float: def _evaluation_loop(self): evaluation_start = time.time() with set_exploration_type( - ExplorationType.DETERMINISTIC + ExplorationType.MODE if self.config.evaluation_deterministic_actions else ExplorationType.RANDOM ):