qfgaohao · tensorturtle · Jan 19, 2021 · Jan 19, 2021 · Jan 19, 2021
diff --git a/run_ssd_example.py b/run_ssd_example.py
@@ -57,11 +57,11 @@
 
 for i in range(boxes.size(0)):
     box = boxes[i, :]
-    cv2.rectangle(orig_image, (box[0], box[1]), (box[2], box[3]), (255, 255, 0), 4)
+    cv2.rectangle(orig_image, (int(box[0].item()), int(box[1].item())), (int(box[2].item()), int(box[3].item())), (255, 255, 0), 4)
     #label = f"""{voc_dataset.class_names[labels[i]]}: {probs[i]:.2f}"""
     label = f"{class_names[labels[i]]}: {probs[i]:.2f}"
     cv2.putText(orig_image, label,
-                (box[0] + 20, box[1] + 40),
+                (int(box[0].item()) + 20, int(box[1].item()) + 40),
                 cv2.FONT_HERSHEY_SIMPLEX,
                 1,  # font scale
                 (255, 0, 255),

diff --git a/run_ssd_live_demo.py b/run_ssd_live_demo.py
@@ -18,9 +18,9 @@
 if len(sys.argv) >= 5:
     cap = cv2.VideoCapture(sys.argv[4])  # capture from file
 else:
-    cap = cv2.VideoCapture(0)   # capture from camera
-    cap.set(3, 1920)
-    cap.set(4, 1080)
+    cap = cv2.VideoCapture(2)   # capture from camera
+    cap.set(3, 1280)
+    cap.set(4, 720)
 
 class_names = [name.strip() for name in open(label_path).readlines()]
 num_classes = len(class_names)
@@ -73,10 +73,10 @@
     for i in range(boxes.size(0)):
         box = boxes[i, :]
         label = f"{class_names[labels[i]]}: {probs[i]:.2f}"
-        cv2.rectangle(orig_image, (box[0], box[1]), (box[2], box[3]), (255, 255, 0), 4)
+        cv2.rectangle(orig_image, (int(box[0].item()), int(box[1].item())), (int(box[2].item()), int(box[3].item())), (255, 255, 0), 4) #convert tensors into ints. Fixes "TypeError: function takes exactly 4 arguments (2 given)"
 
         cv2.putText(orig_image, label,
-                    (box[0]+20, box[1]+40),
+                    (int(box[0].item())+20, int(box[1].item())+40),
                     cv2.FONT_HERSHEY_SIMPLEX,
                     1,  # font scale
                     (255, 0, 255),

diff --git a/vision/utils/box_utils.py b/vision/utils/box_utils.py
@@ -97,6 +97,9 @@ def convert_locations_to_boxes(locations, priors, center_variance,
         boxes:  priors: [[center_x, center_y, h, w]]. All the values
             are relative to the image size.
     """
+    if torch.cuda.is_available():
+        locations = locations.cuda() #explicitly move tensor to cuda; fixes error "RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!"
+
     # priors can have one dimension less.
     if priors.dim() + 1 == locations.dim():
         priors = priors.unsqueeze(0)