Skip to content

Commit

Permalink
drop rewards
Browse files Browse the repository at this point in the history
  • Loading branch information
jjshoots committed Sep 19, 2024
1 parent f389e91 commit 66c8753
Showing 1 changed file with 9 additions and 8 deletions.
17 changes: 9 additions & 8 deletions PyFlyt/pz_envs/fixedwing_envs/ma_fixedwing_dogfight_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,7 @@ def _compute_engagement_rewards(self) -> np.ndarray:
)
* (~self.in_range & self.chasing & self.friendly_fire_mask)
) # positive good, symmetric matrix (before masking) in range [-inf, inf]
engagement_rewards += 3.0 * delta_distance
engagement_rewards += 2.0 * delta_distance

# reward for progressing to engagement, penalty for losing angles is less
# WARNING: NaN introduced here
Expand All @@ -569,7 +569,7 @@ def _compute_engagement_rewards(self) -> np.ndarray:
* (self.in_range & self.friendly_fire_mask)
) # positive is good
delta_angles[delta_angles < 0.0] *= self.aggressiveness
engagement_rewards += 30.0 * delta_angles
engagement_rewards += 20.0 * delta_angles

# reward for engaging the enemy, penalty for being engaged
# WARNING: NaN introduced here
Expand All @@ -583,7 +583,7 @@ def _compute_engagement_rewards(self) -> np.ndarray:
)

# reward for hits, penalty for being hit
engagement_rewards += 20.0 * (
engagement_rewards += 15.0 * (
self.current_hits - (1.0 - self.aggressiveness) * self.current_hits.T
)

Expand Down Expand Up @@ -617,21 +617,21 @@ def _compute_boundary_rewards(self) -> np.ndarray:
if not self.sparse_reward:
# too close to floor
boundary_rewards -= (
5.0
3.0
* (self.attitudes[:, -1, -1] < 10.0)
* (10.0 - self.attitudes[:, -1, -1])
)

# too close to out of bounds
boundary_rewards -= (
0.025
* (self.distances_from_origin > (0.5 * self.flight_dome_size))
* (self.distances_from_origin - (0.5 * self.flight_dome_size))
0.02
* (self.distances_from_origin > (0.75 * self.flight_dome_size))
* (self.distances_from_origin - (0.75 * self.flight_dome_size))
)

# reward for being too close to anyone, minus diagonal to ignore self
boundary_rewards -= np.sum(
5.0
3.0
* (
(self.current_distances < 5.0)
- np.eye(self.current_distances.shape[0])
Expand All @@ -640,6 +640,7 @@ def _compute_boundary_rewards(self) -> np.ndarray:
axis=-1,
)

print(boundary_rewards)
return boundary_rewards

def _compute_term_trunc_rew_info(self) -> None:
Expand Down

0 comments on commit 66c8753

Please sign in to comment.