madras-simulator · rudrasohan · Jan 23, 2019 · Jan 23, 2019 · Jan 23, 2019 · Jan 23, 2019
diff --git a/MPE/gym_torcs.py b/MPE/gym_torcs.py
@@ -7,9 +7,6 @@
 import collections as col
 import os
 import time
-import random
-
-import theano
 
 class TorcsEnv:
     terminal_judge_start = 100      # If after 100 timestep still no progress, terminated
@@ -31,25 +28,26 @@ def __init__(self, vision=False, throttle=False, gear_change=False, main = False
 
         self.currState = None 
 
-        os.system("cd ~/vtorcs3 && ./torcs &")
+        #os.system("cd ~/vtorcs3 && ./torcs &")
+        os.system("torcs -nolaptime &")
         time.sleep(1.0)
         os.system(u'sh autostart.sh')
 
         # Now the action_space and observation_space are actually being used, just like in OpenAI's gym 
         if throttle is False:                           # Throttle is generally True
             self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1,))
         else:
-            high = np.array([1., 1., 1.], dtype=theano.config.floatX)
-            low = np.array([-1., 0., 0.], dtype=theano.config.floatX)
+            high = np.array([1., 1., 1.], dtype=np.float32)
+            low = np.array([-1., 0., 0.], dtype=np.float32)
             self.action_space = spaces.Box(low=low, high=high)              # steer, accel, brake (according to agent_to_torcs() (check the function definition))
 
         if vision is False:                             # Vision is generally False
             high = np.inf*np.ones(self.obs_dim)
             low = -high
             self.observation_space = spaces.Box(low, high)
         else:
-            high = np.array([1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf, 255], dtype=theano.config.floatX)
-            low = np.array([0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf, 0], dtype=theano.config.floatX)
+            high = np.array([1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf, 255], dtype=np.float32)
+            low = np.array([0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf, 0], dtype=np.float32)
             self.observation_space = spaces.Box(low=low, high=high)			
 
     def terminate(self):
@@ -217,6 +215,9 @@ def get_obs(self):
     def reset_torcs(self):
         #print("relaunch torcs")		
         os.system('pkill torcs &')
+        os.system("torcs -nolaptime &")
+        time.sleep(1.0)
+        os.system(u'sh autostart.sh')
 
     def agent_to_torcs(self, u):
         torcs_action = {'steer': u[0]}

diff --git a/MPE/snakeoil3_gym.py b/MPE/snakeoil3_gym.py
@@ -168,7 +168,7 @@ def setup_connection(self):
             sockdata= str()
             try:
                 sockdata,addr= self.so.recvfrom(data_size)
-                sockdata = sockdata.decode(u'utf-8')
+                sockdata = sockdata.decode('utf-8')
             except socket.error as emsg:
                 print(u"Waiting for server on %d............" % self.port)
                 print(u"Count Down : " + str(n_fail))
@@ -244,8 +244,8 @@ def get_servers_input(self, step):
         while True:
             try:
                 # Receive server data
-                sockdata,addr= self.so.recvfrom(data_size)
-                sockdata = sockdata.decode(u'utf-8')
+                sockdata, addr = self.so.recvfrom(data_size)
+                sockdata = sockdata.decode('utf-8')
             except socket.error as emsg:
                 print(u'.')
                 print("Waiting for server data on %d.............." % self.port)
@@ -258,16 +258,16 @@ def get_servers_input(self, step):
 
                 n_fail -= 1
 
-            if u'***identified***' in sockdata:
+            if '***identified***' in sockdata:
                 print(u"Client connected on %d.............." % self.port)
                 continue
-            elif u'***shutdown***' in sockdata:
+            elif '***shutdown***' in sockdata:
                 print ((u"Server has stopped the race on %d. "+
                         u"You were in %d place.") %
                         (self.port,self.S.d[u'racePos']))
                 self.shutdown()
                 return -1
-            elif u'***restart***' in sockdata:
+            elif '***restart***' in sockdata:
                 # What do I do here?
                 print( u"Server has restarted the race on %d." % self.port)
                 # I haven't actually caught the server doing this.
@@ -581,7 +581,7 @@ def drive_example(c):
 
 # ================ MAIN ================
 if __name__ == u"__main__":
-    C= Client(p=3101)
+    C= Client(p=3001)
     for step in range(C.maxSteps,0,-1):
         C.get_servers_input()
         drive_example(C)

diff --git a/MPE/torcs_world.py b/MPE/torcs_world.py
@@ -9,7 +9,7 @@
 class Agent(object):
       def __init__(self,idx=0):
           self.idx = idx
-          self.port = 3101+self.idx
+          self.port = 3001+self.idx
           self.action_dim = 3
           self.state_dim = 65
           self.obs = []
@@ -40,6 +40,8 @@ def initialize_agents(self):
               agent = Agent(idx = i) 
               #agent.s_t = self.get_initial_observation(agent,0) #can remove step from here, can even remove the function
               self.agent_list.append(agent)
+
+          print("Agents created")
 
           ''' for i in range(self.n2):			#uncomment this for competitive agents
               agent = Agent(idx = i + self.n) 
@@ -56,10 +58,13 @@ def reset_agents(self):
               self.agent_list[i].client.R.d['meta'] = True
 
       def get_initial_observation(self, agent,step_count=0):
-          agent.client.get_servers_input(step_count)
+          #print("ENTERED FUNCTION#########################{}".format(agent.port))
+          out = agent.client.get_servers_input(step_count)
           obs = agent.client.S.d
+          #print("STATUS {} {}".format(out, agent.port))
           ob = self.env.make_observation(obs)
           agent.s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY,  ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm, ob.opponents))
+          print(agent.s_t.shape)
           return agent.s_t
 
       def update_agent_state(self, agent):  #this should be a function in agent class

diff --git a/MPE/train.py b/MPE/train.py
@@ -27,7 +27,7 @@ def parse_args():
     parser.add_argument("--num-units", type=int, default=300, help="number of units in the mlp")
     # Checkpointing
     parser.add_argument("--exp-name", type=str, default=None, help="name of the experiment")
-    parser.add_argument("--save-dir", type=str, default="/home/meha/maddpgCheckPoints", help="directory in which training state and model should be saved")
+    parser.add_argument("--save-dir", type=str, default="/tmp/MADRaS/maddpgCheckPoints", help="directory in which training state and model should be saved")
     parser.add_argument("--save-rate", type=int, default=300, help="save model once every time this many episodes are completed")
     parser.add_argument("--load-dir", type=str, default="", help="directory in which training state and model are loaded")
     # Evaluation
@@ -70,13 +70,15 @@ def train(arglist):
     with U.single_threaded_session():
         # create world
         world = World()
+        print("World Created")
 
         # Create environment
         env = MultiAgentTorcsEnv(world, 0, world.reset_world, world.reward, world.observation, done_callback = world.done) 
-
+        print("Env Created")
         # Create agent trainers
         obs_shape_n = [env.observation_space[i].shape for i in range(env.n)]
         num_adversaries = env.adv#min(env.n, arglist.num_adversaries)
+        print("Adversaries created")
         trainers = get_trainers(env, num_adversaries, obs_shape_n, arglist)
 
         print('Using good policy {} and adv policy {}'.format(arglist.good_policy, arglist.adv_policy))
@@ -100,9 +102,11 @@ def train(arglist):
 
         #todo : call reset function here 
         os.system("pkill torcs")
-        os.system("cd ~/vtorcs3 && ./torcs &") #use the location of torcs installation on your system
+        os.system("torcs -nolaptime &") #use the location of torcs installation on your system
+        print("TORCS STARTED")
         time.sleep(0.5)
         os.system('sh autostart.sh')
+        print("USED AUTOSTART")
         time.sleep(1)
 
         obs_n = []

diff --git a/MPE/train_maddpg.py b/MPE/train_maddpg.py
@@ -27,7 +27,7 @@ def parse_args():
     parser.add_argument("--num-units", type=int, default=300, help="number of units in the mlp")
     # Checkpointing
     parser.add_argument("--exp-name", type=str, default=None, help="name of the experiment")
-    parser.add_argument("--save-dir", type=str, default="/home/meha/maddpgCheckPoints", help="directory in which training state and model should be saved")
+    parser.add_argument("--save-dir", type=str, default="/tmp/MADRaS/maddpgCheckPoints", help="directory in which training state and model should be saved")
     parser.add_argument("--save-rate", type=int, default=300, help="save model once every time this many episodes are completed")
     parser.add_argument("--load-dir", type=str, default="", help="directory in which training state and model are loaded")
     # Evaluation
@@ -100,7 +100,8 @@ def train(arglist):
 
         #todo : call reset function here 
         os.system("pkill torcs")
-        os.system("cd ~/vtorcs3 && ./torcs &") #use the location of torcs installation on your system
+        #os.system("cd ~/vtorcs3 && ./torcs &") #use the location of torcs installation on your system
+        os.system("torcs -nolaptime")
         time.sleep(0.5)
         os.system('sh autostart.sh')
         time.sleep(1)