madras-simulator · Santara · Dec 5, 2019 · Dec 6, 2019 · Dec 8, 2019 · Dec 12, 2019
diff --git a/.gitignore b/.gitignore
@@ -2,7 +2,7 @@
 __pycache__/
 *.py[cod]
 *$py.class
-
+*.vscode/
 # C extensions
 *.so
 

diff --git a/MADRaS/__init__.py b/MADRaS/__init__.py
@@ -1,9 +1,9 @@
 """Env Registration."""
 from gym.envs.registration import register
 
-register(
-    id='Madras-v0',
-    entry_point='MADRaS.envs:MadrasEnv',
-    max_episode_steps=10000,
-    reward_threshold=25.0,
-)
+# register(
+#     id='Madras-v0',
+#     entry_point='MADRaS.envs:MadrasEnv',
+#     max_episode_steps=10000,
+#     reward_threshold=25.0,
+# )
diff --git a/MADRaS/agents/generic/pid_test.py b/MADRaS/agents/generic/pid_test.py
@@ -0,0 +1,46 @@
+import numpy as np
+import gym
+from MADRaS.envs.gym_madras_v2 import MadrasEnv
+import os
+import sys
+import logging
+
+logging.basicConfig(level=logging.DEBUG)
+
+def test_madras_pid(vel, file_name):
+    env = MadrasEnv()
+    for key, val in env.agents.items():
+        print("Observation Space ", val.observation_space)
+        print("Obs_dim ", val.obs_dim)
+    print("Testing reset...")
+    obs = env.reset()
+    vel = float(vel)
+    a = [0.0, vel]
+    b = [0.1, 0.00]
+    c = [0.2, -0.2]
+    print("Initial observation: {}."
+          " Verify if the number of dimensions is right.".format(obs))
+    for key, value in obs.items():
+        print("{}: {}".format(key, len(value)))
+    print("Testing step...")
+    running_rew = 0
+    speeds = []
+    for t in range(300):
+        obs, r, done, _ = env.step({"MadrasAgent_0": a})
+        #print("{}".format(obs))
+
+        #    a = [0.0, 0.0]
+        running_rew += r["MadrasAgent_0"]
+        #print("{}: reward={}, done={}".format(t, running_rew, done))
+        #logger.info("HELLO")
+        speeds.append(obs["MadrasAgent_0"][21])
+        if (done['__all__']):
+            env.reset()
+    print(speeds)
+    np.save(file_name, np.array(speeds))
+    os.system("pkill torcs")
+
+
+if __name__=='__main__':
+    #test_madras_vanilla()
+    test_madras_pid(sys.argv[1], sys.argv[2])
diff --git a/MADRaS/test_environment.py → MADRaS/agents/generic/test_environment.py b/MADRaS/test_environment.py → MADRaS/agents/generic/test_environment.py
diff --git a/MADRaS/agents/generic/test_environment_v2.py b/MADRaS/agents/generic/test_environment_v2.py
@@ -0,0 +1,69 @@
+import numpy as np
+import gym
+from MADRaS.envs.gym_madras_v2 import MadrasEnv
+import os
+import sys
+import logging
+
+logging.basicConfig(filename='Telemetry.log', level=logging.DEBUG)
+# logger = logging.getLogger(__name__)
+# logger.setLevel(logging.DEBUG)
+# fh = logging.FileHandler('Telemetry.log')
+# fh.setLevel(logging.DEBUG)
+# logger.addHandler(fh)
+
+
+def test_madras_vanilla():
+    env = MadrasEnv()
+    print("Testing reset...")
+    obs = env.reset()
+    print("Initial observation: {}."
+          " Verify if the number of dimensions {} is right.".format(obs, len(obs)))
+    print("Testing step...")
+    a = [0.0, 1.0, -1.0]
+    a = [0.0, 0.2, 0.0]
+    b = [0.1, 0.3, 0.0]
+    c = [0.2, 0.4, 0.0]
+    for t in range(4000):
+        obs, r, done, _ = env.step({"MadrasAgent_0": a, "MadrasAgent_1": b, "MadrasAgent_2": c})
+        if ((t+1)%150 == 0):
+            a = [0.0, -1.0, 1.0]
+        print("{}: reward={}, done={}".format(t, r, done))
+        dones = [x for x in done.values()]
+        if np.any(dones):
+            env.reset()
+    os.system("pkill torcs")
+
+
+def test_madras_pid():
+    env = MadrasEnv()
+    for key, val in env.agents.items():
+        print("Observation Space ", val.observation_space)
+        print("Obs_dim ", val.obs_dim)
+    print("Testing reset...")
+    obs = env.reset()
+    a = [0.0, 0.2]
+    b = [0.1, 0.00]
+    c = [0.2, -0.2]
+    print("Initial observation: {}."
+          " Verify if the number of dimensions is right.".format(obs))
+    for key, value in obs.items():
+        print("{}: {}".format(key, len(value)))
+    print("Testing step...")
+    running_rew = 0
+    for t in range(4000):
+        obs, r, done, _ = env.step({"MadrasAgent_0": a, "MadrasAgent_1": b, "MadrasAgent_2": c})
+        #print("{}".format(obs))
+        #if ((t+1)%15 == 0):
+        #    a = [0.0, 0.0]
+        running_rew += r["MadrasAgent_0"]
+        #print("{}: reward={}, done={}".format(t, running_rew, done))
+        #logger.info("HELLO")
+        if (done['__all__']):
+            env.reset()
+    os.system("pkill torcs")
+
+
+if __name__=='__main__':
+    test_madras_vanilla()
+    #test_madras_pid()
diff --git a/MADRaS/eval_rllib_agent.py → MADRaS/agents/rllib/eval_rllib_agent.py b/MADRaS/eval_rllib_agent.py → MADRaS/agents/rllib/eval_rllib_agent.py
diff --git a/MADRaS/rllib_helpers.py → MADRaS/agents/rllib/rllib_helpers.py b/MADRaS/rllib_helpers.py → MADRaS/agents/rllib/rllib_helpers.py
diff --git a/MADRaS/train_rllib_agent.py → MADRaS/agents/rllib/train_rllib_agent.py b/MADRaS/train_rllib_agent.py → MADRaS/agents/rllib/train_rllib_agent.py
diff --git a/MADRaS/agents/rllib/train_rllib_multi_agent.py b/MADRaS/agents/rllib/train_rllib_multi_agent.py
@@ -0,0 +1,101 @@
+import ray
+import gym
+import argparse
+from ray.rllib.agents.ppo.ppo import PPOTrainer
+from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy
+from ray.tune.logger import pretty_print
+from ray.rllib.env.multi_agent_env import MultiAgentEnv
+from MADRaS.envs.gym_madras_v2 import MadrasEnv
+import logging
+import numpy as np
+logging.basicConfig(filename='Telemetry.log', level=logging.DEBUG)
+
+class MadrasRllib(MultiAgentEnv, MadrasEnv):
+    """
+        MADRaS rllib Env wrapper.
+    """
+    def __init__(self, *args):
+        MadrasEnv.__init__(self)
+
+    def reset(self):
+        return MadrasEnv.reset(self)
+
+    def step(self, action_dict):
+        return MadrasEnv.step(self, action_dict)
+
+def on_episode_end(info):
+    episode = info["episode"]
+    rewards = episode.agent_rewards
+    total_episode = episode.total_reward
+
+
+    episode.custom_metrics["agent0/rew_2"] = rewards[('MadrasAgent_0', 'ppo_policy_0')]**2.0
+    episode.custom_metrics["agent1/rew_2"] = rewards[('MadrasAgent_1', 'ppo_policy_1')]**2.0
+    episode.custom_metrics["env_rew_2"] = total_episode**2.0
+
+def on_sample_end(info):
+    print(info.keys())
+    sample = info["samples"]
+    print(dir(sample))
+    splits = sample.policy_batches['ppo_policy_0'].split_by_episode()
+    print(len(splits))
+    for split in splits:
+        print("EPISODE= ",np.sum(split['rewards']))
+
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--num-iters", type=int, default=300)
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+    ray.init()
+
+    env = MadrasRllib()
+
+    obs_spaces, action_spaces = [], []
+    for agent in env.agents:
+        obs_spaces.append(env.agents[agent].observation_space)
+        action_spaces.append(env.agents[agent].action_space)
+
+    print(obs_spaces)
+    print(action_spaces)
+    policies = {"ppo_policy_{}".format(i) : (PPOTFPolicy, obs_spaces[i], action_spaces[i], {}) for i in range(env.num_agents)}
+
+    def policy_mapping_fn(agent_id):
+        id = agent_id.split("_")[-1]
+        return "ppo_policy_{}".format(id)
+
+    ppo_trainer = PPOTrainer(
+        env=MadrasRllib,
+        config={
+            "eager": False,
+            "num_workers": 1,
+            "num_gpus": 0,
+            "vf_clip_param": 20,
+            # "sample_batch_size": 20, #set them accordingly
+            "train_batch_size": 500,
+            "callbacks": {
+                "on_episode_end": on_episode_end,
+                #"on_sample_end": on_sample_end,
+            },
+            #"lr": 5e-6,
+            # "sgd_minibatch_size": 24,
+            "multiagent": {
+                "policies": policies,
+                "policy_mapping_fn": policy_mapping_fn,
+            },
+        })
+
+    #ppo_trainer.restore("{restore path}")
+
+    for i in range(args.num_iters):
+        print("== Iteration", i, "==")
+
+        # improve the PPO policy
+        if (i % 10 == 0):
+           checkpoint = ppo_trainer.save()
+           print("checkpoint saved at", checkpoint)
+
+        logging.warning("-- PPO --")
+        print(pretty_print(ppo_trainer.train()))
diff --git a/MADRaS/envs/__init__.py b/MADRaS/envs/__init__.py
@@ -1,2 +1,2 @@
 """Env Import."""
-from envs.gym_madras import MadrasEnv
+from MADRaS.envs.gym_madras_v2 import MadrasEnv
diff --git a/MADRaS/envs/data/communications.yml b/MADRaS/envs/data/communications.yml
@@ -0,0 +1,22 @@
+MadrasAgent_0:
+    buff_size: 2
+    vars:
+        - action
+        - speedX
+        - speedY
+        - track
+    comms:
+        - MadrasAgent_1
+        - MadrasAgent_0
+
+
+MadrasAgent_1:
+    buff_size: 2
+    vars:
+        - action
+        - speedX
+        - speedY
+        - trackPos
+        - opponents
+    comms:
+        - MadrasAgent_0