-
Notifications
You must be signed in to change notification settings - Fork 192
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ValueError: The two structures don't have the same nested structure. when python experiments/ppo_4x4grid.py #179
Comments
@lie12huo have you figured a way to solve it, i am facing the same issue |
@SecondTheFirst This seems to be a version compatibility issue that I have never solved. |
facing the same issue, running it in a docker container |
Facing the same issue. Any ideas? |
Hey, I eventually found a fix somewhere in a PR to rllib's PettingZoo wrappers and I am attaching the file that made it work for us: Simply copy this file somewhere and import It's really just a hot-fix, but I don't remember exactly where I found the PR and it might have been merged into the rllib's main branch by now, so the first thing I would try is to upgrade rllib to the newest release. from typing import Optional
from ray.rllib.env.multi_agent_env import MultiAgentEnv
from ray.rllib.utils.annotations import PublicAPI
from ray.rllib.utils.gym import convert_old_gym_space_to_gymnasium_space
from ray.rllib.utils.typing import MultiAgentDict
# import any
from gymnasium.spaces import Tuple
@PublicAPI
class PettingZooEnv(MultiAgentEnv):
"""An interface to the PettingZoo MARL environment library.
See: https://github.com/Farama-Foundation/PettingZoo
Inherits from MultiAgentEnv and exposes a given AEC
(actor-environment-cycle) game from the PettingZoo project via the
MultiAgentEnv public API.
Note that the wrapper has some important limitations:
1. All agents have the same action_spaces and observation_spaces.
Note: If, within your aec game, agents do not have homogeneous action /
observation spaces, apply SuperSuit wrappers
to apply padding functionality: https://github.com/Farama-Foundation/
SuperSuit#built-in-multi-agent-only-functions
2. Environments are positive sum games (-> Agents are expected to cooperate
to maximize reward). This isn't a hard restriction, it just that
standard algorithms aren't expected to work well in highly competitive
games.
Examples:
>>> from pettingzoo.butterfly import prison_v3
>>> from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
>>> env = PettingZooEnv(prison_v3.env())
>>> obs, infos = env.reset()
>>> print(obs)
# only returns the observation for the agent which should be stepping
{
'prisoner_0': array([[[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
...,
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]]], dtype=uint8)
}
>>> obs, rewards, terminateds, truncateds, infos = env.step({
... "prisoner_0": 1
... })
# only returns the observation, reward, info, etc, for
# the agent who's turn is next.
>>> print(obs)
{
'prisoner_1': array([[[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
...,
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]]], dtype=uint8)
}
>>> print(rewards)
{
'prisoner_1': 0
}
>>> print(terminateds)
{
'prisoner_1': False, '__all__': False
}
>>> print(truncateds)
{
'prisoner_1': False, '__all__': False
}
>>> print(infos)
{
'prisoner_1': {'map_tuple': (1, 0)}
}
"""
def __init__(self, env):
super().__init__()
self.env = env
env.reset()
# Since all agents have the same spaces, do not provide full observation-
# and action-spaces as Dicts, mapping agent IDs to the individual
# agents' spaces. Instead, `self.[action|observation]_space` are the single
# agent spaces.
self._obs_space_in_preferred_format = False
self._action_space_in_preferred_format = False
# Collect the individual agents' spaces (they should all be the same):
first_obs_space = self.env.observation_space(self.env.agents[0])
first_action_space = self.env.action_space(self.env.agents[0])
for agent in self.env.agents:
if self.env.observation_space(agent) != first_obs_space:
raise ValueError(
"Observation spaces for all agents must be identical. Perhaps "
"SuperSuit's pad_observations wrapper can help (useage: "
"`supersuit.aec_wrappers.pad_observations(env)`"
)
if self.env.action_space(agent) != first_action_space:
raise ValueError(
"Action spaces for all agents must be identical. Perhaps "
"SuperSuit's pad_action_space wrapper can help (usage: "
"`supersuit.aec_wrappers.pad_action_space(env)`"
)
# Convert from gym to gymnasium, if necessary.
self.observation_space = convert_old_gym_space_to_gymnasium_space(
first_obs_space
)
self.action_space = convert_old_gym_space_to_gymnasium_space(first_action_space)
self._agent_ids = self.env.agents
def observation_space_sample(self, agent_ids: list = None) -> MultiAgentDict:
if agent_ids is None:
agent_ids = self._agent_ids
return {id: self.observation_space.sample() for id in agent_ids}
def action_space_sample(self, agent_ids: list = None) -> MultiAgentDict:
if agent_ids is None:
agent_ids = self._agent_ids
return {id: self.action_space.sample() for id in agent_ids}
def action_space_contains(self, x: MultiAgentDict) -> bool:
if not isinstance(x, dict):
return False
return all(self.action_space.contains(val) for val in x.values())
def observation_space_contains(self, x: MultiAgentDict) -> bool:
if not isinstance(x, dict):
return False
return all(self.observation_space.contains(val) for val in x.values())
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
info = self.env.reset(seed=seed, options=options)
return (
{self.env.agent_selection: self.env.observe(self.env.agent_selection)},
info or {},
)
def step(self, action):
self.env.step(action[self.env.agent_selection])
obs_d = {}
rew_d = {}
terminated_d = {}
truncated_d = {}
info_d = {}
while self.env.agents:
obs, rew, terminated, truncated, info = self.env.last()
agent_id = self.env.agent_selection
obs_d[agent_id] = obs
rew_d[agent_id] = rew
terminated_d[agent_id] = terminated
truncated_d[agent_id] = truncated
info_d[agent_id] = info
if (
self.env.terminations[self.env.agent_selection]
or self.env.truncations[self.env.agent_selection]
):
self.env.step(None)
else:
break
all_gone = not self.env.agents
terminated_d["__all__"] = all_gone and all(terminated_d.values())
truncated_d["__all__"] = all_gone and all(truncated_d.values())
return obs_d, rew_d, terminated_d, truncated_d, info_d
def close(self):
self.env.close()
def render(self):
return self.env.render(self.render_mode)
@property
def get_sub_environments(self):
return self.env.unwrapped
@PublicAPI
class ParallelPettingZooEnv(MultiAgentEnv):
def __init__(self, env):
super().__init__()
self.par_env = env
self.par_env.reset()
# Since all agents have the same spaces, do not provide full observation-
# and action-spaces as Dicts, mapping agent IDs to the individual
# agents' spaces. Instead, `self.[action|observation]_space` are the single
# agent spaces.
self._obs_space_in_preferred_format = False
self._action_space_in_preferred_format = False
# Get first observation space, assuming all agents have equal space
self.observation_space = self.par_env.observation_space(self.par_env.agents[0])
# Get first action space, assuming all agents have equal space
self.action_space = self.par_env.action_space(self.par_env.agents[0])
assert all(
self.par_env.observation_space(agent) == self.observation_space
for agent in self.par_env.agents
), (
"Observation spaces for all agents must be identical. Perhaps "
"SuperSuit's pad_observations wrapper can help (useage: "
"`supersuit.aec_wrappers.pad_observations(env)`"
)
if not all(
self.par_env.action_space(agent) == self.action_space
for agent in self.par_env.agents
):
print("Action spaces for all agents must be identical. Perhaps "
"SuperSuit's pad_action_space wrapper can help (useage: "
"`supersuit.aec_wrappers.pad_action_space(env)`"
)
assert all(
self.par_env.action_space(agent) == self.action_space
for agent in self.par_env.agents
), (
"Action spaces for all agents must be identical. Perhaps "
"SuperSuit's pad_action_space wrapper can help (useage: "
"`supersuit.aec_wrappers.pad_action_space(env)`"
)
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
obs, info = self.par_env.reset(seed=seed, options=options)
return obs, info or {}
def step(self, action_dict):
obss, rews, terminateds, truncateds, infos = self.par_env.step(action_dict)
terminateds["__all__"] = all(terminateds.values())
truncateds["__all__"] = all(truncateds.values())
return obss, rews, terminateds, truncateds, infos
def close(self):
self.par_env.close()
def render(self):
return self.par_env.render(self.render_mode)
@property
def get_sub_environments(self):
return self.par_env.unwrapped |
It works! Thanks a lot! |
When I executed the command “python experiments/ppo_4x4grid.py” for training, the following error occurred:
The text was updated successfully, but these errors were encountered: