diff --git a/PyFlyt/pz_envs/fixedwing_envs/ma_fixedwing_dogfight_env.py b/PyFlyt/pz_envs/fixedwing_envs/ma_fixedwing_dogfight_env.py index 9016f660..644b3415 100644 --- a/PyFlyt/pz_envs/fixedwing_envs/ma_fixedwing_dogfight_env.py +++ b/PyFlyt/pz_envs/fixedwing_envs/ma_fixedwing_dogfight_env.py @@ -559,7 +559,7 @@ def _compute_engagement_rewards(self) -> np.ndarray: ) * (~self.in_range & self.chasing & self.friendly_fire_mask) ) # positive good, symmetric matrix (before masking) in range [-inf, inf] - engagement_rewards += 3.0 * delta_distance + engagement_rewards += 2.0 * delta_distance # reward for progressing to engagement, penalty for losing angles is less # WARNING: NaN introduced here @@ -569,7 +569,7 @@ def _compute_engagement_rewards(self) -> np.ndarray: * (self.in_range & self.friendly_fire_mask) ) # positive is good delta_angles[delta_angles < 0.0] *= self.aggressiveness - engagement_rewards += 30.0 * delta_angles + engagement_rewards += 20.0 * delta_angles # reward for engaging the enemy, penalty for being engaged # WARNING: NaN introduced here @@ -583,7 +583,7 @@ def _compute_engagement_rewards(self) -> np.ndarray: ) # reward for hits, penalty for being hit - engagement_rewards += 20.0 * ( + engagement_rewards += 15.0 * ( self.current_hits - (1.0 - self.aggressiveness) * self.current_hits.T ) @@ -617,21 +617,21 @@ def _compute_boundary_rewards(self) -> np.ndarray: if not self.sparse_reward: # too close to floor boundary_rewards -= ( - 5.0 + 3.0 * (self.attitudes[:, -1, -1] < 10.0) * (10.0 - self.attitudes[:, -1, -1]) ) # too close to out of bounds boundary_rewards -= ( - 0.025 - * (self.distances_from_origin > (0.5 * self.flight_dome_size)) - * (self.distances_from_origin - (0.5 * self.flight_dome_size)) + 0.02 + * (self.distances_from_origin > (0.75 * self.flight_dome_size)) + * (self.distances_from_origin - (0.75 * self.flight_dome_size)) ) # reward for being too close to anyone, minus diagonal to ignore self boundary_rewards -= np.sum( - 5.0 + 3.0 * ( (self.current_distances < 5.0) - np.eye(self.current_distances.shape[0]) @@ -640,6 +640,7 @@ def _compute_boundary_rewards(self) -> np.ndarray: axis=-1, ) + print(boundary_rewards) return boundary_rewards def _compute_term_trunc_rew_info(self) -> None: