Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added the object tracking code under tensorflow tutorials #19

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions Tensorflow-tutorials/object_tracking_rolo/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# ROLO

This repo contains the code for object tracking using ROLO network.
ROLO uses YOLO network for object detection and LSTM for sequential processing. So we need pretrained weights for YOLO and LSTM.

Clone the repo and run the files as mentioned below.

Downloads the pretrained weights and extract them using downloads.sh script

> `sh download.sh`

ROLO can be run in two different modes. We can give as input either the video file directly or give the folder containing all the frames in the video
To test the ROLO network you can use the sample videos in sample_videos folder or give your own video files.

- To run ROLO with a video file, execute the following command from repo directory



> python ROLO_test.py --path PATH_TO_VIDEO --video

where PATH_TO_VIDEO is the path to video file

For eg., to run with one of the sample videos,

> python ROLO_test.py --path sample_videos/test_video1.mp4 --video

- To run ROLO with frames of a video, execute the following command from repo directory

> python ROLO_test.py --path PATH_TO_FRAMES

where PATH_TO_FRAMES is the path to the folder containing the frames of the video

The script first run YOLO network and then ROLO network. We can see the prediction visuals during the run time itself. When the script completes a tracking video is created in the **output** folder which is located in the same directory as input video or input frames folder.

136 changes: 136 additions & 0 deletions Tensorflow-tutorials/object_tracking_rolo/ROLO_network.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
import sys
import ROLO_utils as utils
import tensorflow as tf
import cv2

import numpy as np
import os
import time
import random
class ROLO():
disp_console = True
restore_weights = True
num_steps = 3
num_feat = 4096
num_predict = 6 # final output of LSTM 6 loc parameters
num_gt = 4
num_input = num_feat + num_predict # data input: 4096+6= 5002
rolo_weights_file = os.path.join(os.getcwd(), 'checkpoint', 'demo3.ckpt')

batch_size = 1
display_step = 1

def __init__(self, path1, video = True):

print("Initialising ROLO")
self.x = tf.placeholder("float32", [None, self.num_steps, self.num_input])
self.y = tf.placeholder("float32", [None, self.num_gt])
if video:
self.path = os.path.dirname(path1)
self.load_config(path1, video = True)
else:
self.path = os.path.split(path1)[0]
self.load_config(path1, video = False)
self.rolo_utils = utils.ROLO_utils()
self.output_path = os.path.join(self.path, 'rolo_out_test')
utils.createFolder(self.output_path)
self.build_networks()

def run_net(self):
start_time = time.time()
self.testing(os.path.join(self.path, 'yolo_out'))
elapsed_time = time.time() - start_time
print('ROLO network executed in {:.0f} minutes {:.0f} seconds'.format(elapsed_time//60,elapsed_time%60))
return self.w_img, self.h_img, self.num_steps

def load_config(self, path, video):
if video:
data = cv2.VideoCapture(path)
self.testing_iters = int(data.get(cv2.CAP_PROP_FRAME_COUNT))
_,img = data.read()
self.h_img, self.w_img, _ = img.shape
else:
temp = next(os.walk(path))[2]
self.testing_iters = len(temp)
img = cv2.imread(os.path.join(path, temp[2]))
self.h_img, self.w_img, _ = img.shape


def LSTM_single(self,_X):

# input shape: (batch_size, n_steps, n_input)
_X = tf.transpose(_X, [1, 0, 2]) # permute num_steps and batch_size
# Reshape to prepare input to hidden activation
_X = tf.reshape(_X, [self.num_steps * self.batch_size, self.num_input]) # (num_steps*batch_size, num_input)
# Split data because rnn cell needs a list of inputs for the RNN inner loop
_X = tf.split(_X, self.num_steps, 0) # n_steps * (batch_size, num_input)
cell = tf.nn.rnn_cell.LSTMCell(self.num_input, name='basic_lstm_cell')
state = cell.zero_state(self.batch_size, dtype=tf.float32)
outputs, state = tf.nn.static_rnn(cell, _X, initial_state=state, dtype=tf.float32)
tf.get_variable_scope().reuse_variables()
return outputs


def build_networks(self):
if self.disp_console : print("Building ROLO graph...")

# Build rolo layers
self.lstm_module = self.LSTM_single(self.x)
self.sess = tf.Session()
self.sess.run(tf.global_variables_initializer())
self.saver = tf.train.Saver()
if self.disp_console : print("Loading complete!" + '\n')


def testing(self, x_path):

print("TESTING ROLO...")
# Use rolo_input for LSTM training
pred = self.LSTM_single(self.x)
print("pred: ", pred)
self.pred_location = pred[-1][:, 4097:4101]
print("pred_location: ", self.pred_location)
init = tf.global_variables_initializer()

# Launch the graph
with tf.Session() as sess:

if (self.restore_weights == True):
sess.run(init)
self.saver.restore(sess, self.rolo_weights_file)
print("Loading complete!" + '\n')
else:
sess.run(init)


id = 0
total_time = 0.0


# Keep training until reach max iterations
while id < self.testing_iters - self.num_steps:
ti = time.time()
# Load training data & ground truth
batch_xs = self.rolo_utils.load_yolo_output_test(x_path, self.batch_size, self.num_steps, id)

# Reshape data to get 3 seq of 5002 elements
batch_xs = np.reshape(batch_xs, [self.batch_size, self.num_steps, self.num_input])

pred_location= sess.run(self.pred_location,feed_dict={self.x: batch_xs})
print('Image_no{}'.format(id+1))
print("ROLO Pred: ", pred_location)
print("ROLO Pred in pixel: ", pred_location[0][0]*self.w_img, pred_location[0][1]*self.h_img, pred_location[0][2]*self.w_img, pred_location[0][3]*self.h_img)

# Save pred_location to file
utils.save_rolo_output_test(self.output_path, pred_location, id, self.num_steps, self.batch_size)

if id % self.display_step == 0:
cycle_time = time.time()-ti
total_time += cycle_time
id += 1
print(cycle_time)

print("ROLO network executed")
print(total_time)
return None

75 changes: 75 additions & 0 deletions Tensorflow-tutorials/object_tracking_rolo/ROLO_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@

import sys
import ROLO_utils as utils
from YOLO_network import YOLO
from ROLO_network import ROLO
import tensorflow as tf
import cv2
import argparse
import numpy as np
import os
import time
import random

parser = argparse.ArgumentParser()
parser.add_argument("-p" , "--path", required = True, type = str, help = "path to video or image folder")
parser.add_argument("-v", '--video',action='store_true')
args = vars(parser.parse_args())
start_time=time.time()
video = args['video']
path1 = args['path']
if video:
path = os.path.dirname(path1)
img_fold_path = os.path.join(path, 'img')
else:
path = os.path.split(path1)[0]
img_fold_path = path1

yolo = YOLO(path1, video=video)
yolo.run_net()


tf.reset_default_graph()
rolo = ROLO(path1, video = video)
width, height, num_steps = rolo.run_net()

yolo_out_path= os.path.join(path, 'yolo_out')
rolo_out_path= os.path.join(path, 'rolo_out_test')

paths_imgs = utils.load_folder(img_fold_path)
paths_rolo= utils.load_folder(rolo_out_path)

utils.createFolder(os.path.join(path, 'output/frames'))
utils.createFolder(os.path.join(path, 'output/videos'))

fourcc= cv2.VideoWriter_fourcc(*'DIVX')
video_name = 'test_video.avi'
video_path = os.path.join(os.path.join(path, 'output/videos'), video_name)
video = cv2.VideoWriter(video_path, fourcc, 20, (width, height))

for i in range(len(paths_rolo)- num_steps):
id= i + 1
test_id= id + num_steps - 2 #* num_steps + 1

path2 = paths_imgs[test_id]
img = utils.file_to_img(path2)

if(img is None): break

yolo_location= utils.find_yolo_location(yolo_out_path, test_id)
yolo_location= utils.locations_normal( width, height, yolo_location)
print(yolo_location)

rolo_location= utils.find_rolo_location( rolo_out_path, test_id)
rolo_location = utils.locations_normal( width, height, rolo_location)
print(rolo_location)

frame = utils.debug_2_locations( img, rolo_location, yolo_location)
video.write(frame)

frame_name= os.path.join(os.path.join(path, 'output/frames'),str(test_id)+'.jpg')
cv2.imwrite(frame_name, frame)

video.release()
cv2.destroyAllWindows()

Loading