You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
i write a webcam demo for this ,but i can not get 120fps as reported on paper, anyone can help me ?
('cap read frame time : ', 0.03454303741455078)
('detect time: ', 0.1441190242767334)
this is my code
import numpy as np
import matplotlib.pyplot as plt
import time
import os
import caffe
import cv2
from google.protobuf import text_format
from caffe.proto import caffe_pb2
caffe.set_mode_gpu()
def get_labelname(labelmap, labels):
num_labels = len(labelmap.item)
labelnames = []
if type(labels) is not list:
labels = [labels]
for label in labels:
found = False
for i in xrange(0, num_labels):
if label == labelmap.item[i].label:
found = True
labelnames.append(labelmap.item[i].display_name)
break
assert found == True
return labelnames
net = caffe.Net(model_def, # defines the structure of the model
model_weights, # contains the trained weights
caffe.TEST) # use test mode (e.g., don't perform dropout)
input preprocessing: 'data' is the name of the input blob == net.inputs[0]
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2, 0, 1))
transformer.set_input_scale('data', 0.017)
transformer.set_mean('data', np.array([103.94,116.78,123.68])) # mean pixel
transformer.set_raw_scale('data', 255) # the reference model operates on images in [0,255] range instead of [0,1]
transformer.set_channel_swap('data', (2,1,0)) # the reference model has channels in BGR order instead of RGB
font = cv2.FONT_HERSHEY_SIMPLEX
def open_cam_onboard(width, height):
# On versions of L4T previous to L4T 28.1, flip-method=2
# Use Jetson onboard camera
gst_str = ("nvcamerasrc ! "
"video/x-raw(memory:NVMM), width=(int)800, height=(int)600, format=(string)I420, framerate=(fraction)5/1 ! "
"nvvidconv ! video/x-raw, width=(int){}, height=(int){}, format=(string)BGRx ! "
"videoconvert ! appsink").format(width, height)
return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)
def do_detect(image,img2):
transformed_image = transformer.preprocess('data', image)
net.blobs['data'].data[...] = transformed_image
# Forward pass.
detections = net.forward()['detection_out']
# Parse the outputs.
det_label = detections[0,0,:,1]
det_conf = detections[0,0,:,2]
det_xmin = detections[0,0,:,3]
det_ymin = detections[0,0,:,4]
det_xmax = detections[0,0,:,5]
det_ymax = detections[0,0,:,6]
# Get detections with confidence higher than 0.4.
top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.4]
top_conf = det_conf[top_indices]
top_label_indices = det_label[top_indices].tolist()
top_labels = get_labelname(labelmap, top_label_indices)
top_xmin = det_xmin[top_indices]
top_ymin = det_ymin[top_indices]
top_xmax = det_xmax[top_indices]
top_ymax = det_ymax[top_indices]
for i in xrange(top_conf.shape[0]):
xmin = int(round(top_xmin[i] * image.shape[1]))
ymin = int(round(top_ymin[i] * image.shape[0]))
xmax = int(round(top_xmax[i] * image.shape[1]))
ymax = int(round(top_ymax[i] * image.shape[0]))
score = top_conf[i]
label_name = top_labels[i]
img2=cv2.rectangle(img2,(xmin,ymin),(xmax,ymax),(0,255,0))
img2=cv2.putText(img2,label_name+':'+str(score),(xmin,ymin-5),font,2,(0,0,255),1)
return img2
What is the fps you are getting.
In the paper it's mentioned
The speed is calculated by the average time of processing 100 pictures with 1 batch size.
We run 100 picture processing for 10 times separately and average the time.
the paper also uses FP16 instead of FP 32 to achieve the desired FPS
Would be great if the author could confirm this
i write a webcam demo for this ,but i can not get 120fps as reported on paper, anyone can help me ?
('cap read frame time : ', 0.03454303741455078)
('detect time: ', 0.1441190242767334)
this is my code
import numpy as np
import matplotlib.pyplot as plt
import time
import os
import caffe
import cv2
from google.protobuf import text_format
from caffe.proto import caffe_pb2
caffe.set_mode_gpu()
load PASCAL VOC labels
labelmap_file = 'model/voc/labelmap_voc.prototxt'
file = open(labelmap_file, 'r')
labelmap = caffe_pb2.LabelMap()
text_format.Merge(str(file.read()), labelmap)
def get_labelname(labelmap, labels):
num_labels = len(labelmap.item)
labelnames = []
if type(labels) is not list:
labels = [labels]
for label in labels:
found = False
for i in xrange(0, num_labels):
if label == labelmap.item[i].label:
found = True
labelnames.append(labelmap.item[i].display_name)
break
assert found == True
return labelnames
model_def = 'model/voc/deploy_merged.prototxt'
model_weights = 'model/voc/pelee_merged.caffemodel'
net = caffe.Net(model_def, # defines the structure of the model
model_weights, # contains the trained weights
caffe.TEST) # use test mode (e.g., don't perform dropout)
input preprocessing: 'data' is the name of the input blob == net.inputs[0]
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2, 0, 1))
transformer.set_input_scale('data', 0.017)
transformer.set_mean('data', np.array([103.94,116.78,123.68])) # mean pixel
transformer.set_raw_scale('data', 255) # the reference model operates on images in [0,255] range instead of [0,1]
transformer.set_channel_swap('data', (2,1,0)) # the reference model has channels in BGR order instead of RGB
font = cv2.FONT_HERSHEY_SIMPLEX
def open_cam_onboard(width, height):
# On versions of L4T previous to L4T 28.1, flip-method=2
# Use Jetson onboard camera
gst_str = ("nvcamerasrc ! "
"video/x-raw(memory:NVMM), width=(int)800, height=(int)600, format=(string)I420, framerate=(fraction)5/1 ! "
"nvvidconv ! video/x-raw, width=(int){}, height=(int){}, format=(string)BGRx ! "
"videoconvert ! appsink").format(width, height)
return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)
def do_detect(image,img2):
transformed_image = transformer.preprocess('data', image)
net.blobs['data'].data[...] = transformed_image
set net to batch size of 1
image_resize = 304
net.blobs['data'].reshape(1,3,image_resize,image_resize)
capture = open_cam_onboard(800, 600)
while(True):
time0=time.time()
ret, frame = capture.read()
frame1=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
frame1=frame1/255.
print("cap read frame time : ",time.time()-time0)
time1=time.time()
res=do_detect(frame1,frame)
print("detect time: ",time.time()-time1)
cv2.imshow('frame', res)
if cv2.waitKey(1) == ord('q'):
break
The text was updated successfully, but these errors were encountered: