legolas123 · gouthamvgk · Feb 24, 2019 · Feb 24, 2019
diff --git a/Tensorflow-tutorials/object_tracking_rolo/README.md b/Tensorflow-tutorials/object_tracking_rolo/README.md
@@ -0,0 +1,34 @@
+# ROLO
+
+This repo contains the code for object tracking using ROLO network.
+ROLO uses YOLO network for object detection and LSTM for sequential processing. So we need pretrained weights for YOLO and LSTM.
+
+Clone the repo and run the files as mentioned below.
+
+Downloads the pretrained weights and extract them using downloads.sh script 
+
+> `sh download.sh`
+
+ROLO can be run in two different modes. We can give as input either the video file directly or give the folder containing all the frames in the video
+To test the ROLO network you can use the sample videos in sample_videos folder or give your own video files.
+
+ - To run ROLO with a video file, execute the following command from repo directory
+
+
+
+>  python ROLO_test.py --path PATH_TO_VIDEO --video
+
+where PATH_TO_VIDEO is the path to video file
+
+For eg., to run with one of the sample videos,
+
+> python ROLO_test.py --path sample_videos/test_video1.mp4 --video
+
+ - To run ROLO with frames of a video, execute the following command from repo directory
+
+> python ROLO_test.py --path PATH_TO_FRAMES
+
+where PATH_TO_FRAMES is the path to the folder containing the frames of the video
+
+The script first run YOLO network and then ROLO network. We can see the prediction visuals during the run time itself.  When the script completes a tracking video is created in the **output** folder which is located in the same directory as input video or input frames folder.
+
diff --git a/Tensorflow-tutorials/object_tracking_rolo/ROLO_network.py b/Tensorflow-tutorials/object_tracking_rolo/ROLO_network.py
@@ -0,0 +1,136 @@
+import sys
+import ROLO_utils as utils
+import tensorflow as tf
+import cv2
+
+import numpy as np
+import os
+import time
+import random
+class ROLO():
+    disp_console = True
+    restore_weights = True
+    num_steps = 3
+    num_feat = 4096
+    num_predict = 6 # final output of LSTM 6 loc parameters
+    num_gt = 4
+    num_input = num_feat + num_predict # data input: 4096+6= 5002
+    rolo_weights_file = os.path.join(os.getcwd(), 'checkpoint', 'demo3.ckpt')
+
+    batch_size = 1
+    display_step = 1
+
+    def __init__(self, path1, video = True):
+
+        print("Initialising ROLO")
+        self.x = tf.placeholder("float32", [None, self.num_steps, self.num_input])
+        self.y = tf.placeholder("float32", [None, self.num_gt])
+        if video:
+            self.path = os.path.dirname(path1)
+            self.load_config(path1, video = True)
+        else:
+            self.path = os.path.split(path1)[0]
+            self.load_config(path1, video = False)
+        self.rolo_utils = utils.ROLO_utils()
+        self.output_path = os.path.join(self.path, 'rolo_out_test')
+        utils.createFolder(self.output_path)
+        self.build_networks()
+
+    def run_net(self):
+        start_time = time.time()
+        self.testing(os.path.join(self.path, 'yolo_out'))
+        elapsed_time = time.time() - start_time
+        print('ROLO network executed in {:.0f} minutes {:.0f} seconds'.format(elapsed_time//60,elapsed_time%60))
+        return self.w_img, self.h_img, self.num_steps
+
+    def load_config(self, path, video):
+        if video:
+            data = cv2.VideoCapture(path)
+            self.testing_iters = int(data.get(cv2.CAP_PROP_FRAME_COUNT))
+            _,img = data.read()
+            self.h_img, self.w_img, _ = img.shape
+        else:
+            temp = next(os.walk(path))[2]
+            self.testing_iters = len(temp)
+            img = cv2.imread(os.path.join(path, temp[2]))
+            self.h_img, self.w_img, _ = img.shape
+
+
+    def LSTM_single(self,_X):
+
+        # input shape: (batch_size, n_steps, n_input)
+        _X = tf.transpose(_X, [1, 0, 2])  # permute num_steps and batch_size
+        # Reshape to prepare input to hidden activation
+        _X = tf.reshape(_X, [self.num_steps * self.batch_size, self.num_input]) # (num_steps*batch_size, num_input)
+        # Split data because rnn cell needs a list of inputs for the RNN inner loop
+        _X = tf.split(_X, self.num_steps, 0) # n_steps * (batch_size, num_input)
+        cell = tf.nn.rnn_cell.LSTMCell(self.num_input, name='basic_lstm_cell')
+        state = cell.zero_state(self.batch_size, dtype=tf.float32)
+        outputs, state = tf.nn.static_rnn(cell, _X, initial_state=state, dtype=tf.float32)
+        tf.get_variable_scope().reuse_variables()
+        return outputs
+
+
+    def build_networks(self):
+        if self.disp_console : print("Building ROLO graph...")
+
+        # Build rolo layers
+        self.lstm_module = self.LSTM_single(self.x)
+        self.sess = tf.Session()
+        self.sess.run(tf.global_variables_initializer())
+        self.saver = tf.train.Saver()
+        if self.disp_console : print("Loading complete!" + '\n')
+
+
+    def testing(self, x_path):
+
+        print("TESTING ROLO...")
+        # Use rolo_input for LSTM training
+        pred = self.LSTM_single(self.x)
+        print("pred: ", pred)
+        self.pred_location = pred[-1][:, 4097:4101]
+        print("pred_location: ", self.pred_location)
+        init = tf.global_variables_initializer()
+
+        # Launch the graph
+        with tf.Session() as sess:
+
+            if (self.restore_weights == True):
+                sess.run(init)
+                self.saver.restore(sess, self.rolo_weights_file)
+                print("Loading complete!" + '\n')
+            else:
+                sess.run(init)
+
+
+            id = 0
+            total_time = 0.0
+
+
+            # Keep training until reach max iterations
+            while id < self.testing_iters - self.num_steps:
+                ti = time.time()
+                # Load training data & ground truth
+                batch_xs = self.rolo_utils.load_yolo_output_test(x_path, self.batch_size, self.num_steps, id)
+
+                # Reshape data to get 3 seq of 5002 elements
+                batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input])
+
+                pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs})
+                print('Image_no{}'.format(id+1))
+                print("ROLO Pred: ", pred_location)
+                print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img)
+
+                # Save pred_location to file
+                utils.save_rolo_output_test(self.output_path, pred_location, id, self.num_steps, self.batch_size)
+
+                if id % self.display_step == 0:
+                    cycle_time = time.time()-ti
+                    total_time += cycle_time
+                id += 1
+                print(cycle_time)
+
+            print("ROLO network executed")
+            print(total_time)
+        return None
+
diff --git a/Tensorflow-tutorials/object_tracking_rolo/ROLO_test.py b/Tensorflow-tutorials/object_tracking_rolo/ROLO_test.py
@@ -0,0 +1,75 @@
+
+import sys
+import ROLO_utils as utils
+from YOLO_network import YOLO
+from ROLO_network import ROLO
+import tensorflow as tf
+import cv2
+import argparse
+import numpy as np
+import os
+import time
+import random
+
+parser = argparse.ArgumentParser()
+parser.add_argument("-p" , "--path", required = True, type = str, help = "path to video or image folder")
+parser.add_argument("-v", '--video',action='store_true')
+args = vars(parser.parse_args())
+start_time=time.time()
+video = args['video']
+path1 = args['path']
+if video:
+    path = os.path.dirname(path1)
+    img_fold_path = os.path.join(path, 'img')
+else:
+    path = os.path.split(path1)[0]
+    img_fold_path = path1
+
+yolo = YOLO(path1, video=video)
+yolo.run_net()
+
+
+tf.reset_default_graph()
+rolo = ROLO(path1, video = video)
+width, height, num_steps = rolo.run_net()
+
+yolo_out_path= os.path.join(path, 'yolo_out')
+rolo_out_path= os.path.join(path, 'rolo_out_test')
+
+paths_imgs = utils.load_folder(img_fold_path)
+paths_rolo= utils.load_folder(rolo_out_path)
+
+utils.createFolder(os.path.join(path, 'output/frames'))
+utils.createFolder(os.path.join(path, 'output/videos'))
+
+fourcc= cv2.VideoWriter_fourcc(*'DIVX')
+video_name = 'test_video.avi'
+video_path = os.path.join(os.path.join(path, 'output/videos'), video_name)
+video = cv2.VideoWriter(video_path, fourcc, 20, (width, height))
+
+for i in range(len(paths_rolo)- num_steps):
+        id= i + 1
+        test_id= id + num_steps - 2  #* num_steps + 1
+
+        path2 = paths_imgs[test_id]
+        img = utils.file_to_img(path2)
+
+        if(img is None): break
+
+        yolo_location= utils.find_yolo_location(yolo_out_path, test_id)
+        yolo_location= utils.locations_normal( width, height, yolo_location)
+        print(yolo_location)
+
+        rolo_location= utils.find_rolo_location( rolo_out_path, test_id)
+        rolo_location = utils.locations_normal( width, height, rolo_location)
+        print(rolo_location)
+
+        frame = utils.debug_2_locations( img, rolo_location, yolo_location)
+        video.write(frame)
+
+        frame_name= os.path.join(os.path.join(path, 'output/frames'),str(test_id)+'.jpg')
+        cv2.imwrite(frame_name, frame)
+
+video.release()
+cv2.destroyAllWindows()
+