Move back to GPU

commaai · Sep 24, 2024 · fc4c5ab · fc4c5ab
1 parent 0f3f374
commit fc4c5ab
Show file tree

Hide file tree

Showing 5 changed files with 21 additions and 19 deletions.
diff --git a/selfdrive/modeld/modeld.py b/selfdrive/modeld/modeld.py
@@ -101,15 +101,10 @@ def run(self, buf: VisionBuf, wbuf: VisionBuf, transform: np.ndarray, transform_
     self.inputs['traffic_convention'][:] = inputs['traffic_convention']
     self.inputs['lateral_control_params'][:] = inputs['lateral_control_params']
 
-    new_img = self.frame.prepare(buf, transform.flatten(), self.model.getCLBuffer("input_imgs"))
-    self.input_imgs[:MODEL_FRAME_SIZE] = self.input_imgs[-MODEL_FRAME_SIZE:]
-    self.input_imgs[MODEL_FRAME_SIZE:] = new_img[:]
-    self.model.setInputBuffer("input_imgs", self.input_imgs.view(np.float32))
+    # if getCLBuffer is not None, frame will be None
+    self.model.setInputBuffer("input_imgs", self.frame.prepare(buf, transform.flatten(), self.model.getCLBuffer("input_imgs")))
     if wbuf is not None:
-      new_big_img = self.wide_frame.prepare(wbuf, transform_wide.flatten(), self.model.getCLBuffer("big_input_imgs"))
-      self.big_input_imgs[:MODEL_FRAME_SIZE] = self.big_input_imgs[-MODEL_FRAME_SIZE:]
-      self.big_input_imgs[MODEL_FRAME_SIZE:] = new_big_img[:]
-      self.model.setInputBuffer("big_input_imgs", self.big_input_imgs.view(np.float32))
+      self.model.setInputBuffer("big_input_imgs", self.wide_frame.prepare(wbuf, transform_wide.flatten(), self.model.getCLBuffer("big_input_imgs")))
 
 
     if prepare_only:

diff --git a/selfdrive/modeld/models/commonmodel.cc b/selfdrive/modeld/models/commonmodel.cc
@@ -7,7 +7,7 @@
 #include "common/clutil.h"
 
 ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) {
-  frame = std::make_unique<uint8_t[]>(MODEL_FRAME_SIZE);
+  input_frames = std::make_unique<uint8_t[]>(buf_size);
 
   q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
   y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_WIDTH * MODEL_HEIGHT, NULL, &err));
@@ -20,13 +20,19 @@ ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) {
 }
 
 uint8_t* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3 &projection, cl_mem *output) {
-  transform_queue(&this->transform, q,
-                  yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset,
-                  y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, projection);
-  loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl);
-  CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(uint8_t), &frame[0], 0, nullptr, nullptr));
-  clFinish(q);
-  return &frame[0];
+  if (output == NULL) {
+    loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl);
+
+    std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], sizeof(uint8_t) * MODEL_FRAME_SIZE);
+    CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(uint8_t), &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr));
+    clFinish(q);
+    return &input_frames[0];
+  } else {
+    loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, *output, true);
+    // NOTE: Since thneed is using a different command queue, this clFinish is needed to ensure the image is ready.
+    clFinish(q);
+    return NULL;
+  }
 }
 
 ModelFrame::~ModelFrame() {

diff --git a/selfdrive/modeld/models/commonmodel.h b/selfdrive/modeld/models/commonmodel.h
@@ -25,11 +25,12 @@ class ModelFrame {
   const int MODEL_WIDTH = 512;
   const int MODEL_HEIGHT = 256;
   const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2;
+  const int buf_size = MODEL_FRAME_SIZE * 2;
 
 private:
   Transform transform;
   LoadYUVState loadyuv;
   cl_command_queue q;
   cl_mem y_cl, u_cl, v_cl, net_input_cl;
-  std::unique_ptr<uint8_t[]> frame;
+  std::unique_ptr<uint8_t[]> input_frames;
 };
diff --git a/selfdrive/modeld/models/commonmodel.pxd b/selfdrive/modeld/models/commonmodel.pxd
@@ -13,6 +13,6 @@ cdef extern from "common/clutil.h":
 
 cdef extern from "selfdrive/modeld/models/commonmodel.h":
   cppclass ModelFrame:
-    int MODEL_FRAME_SIZE
+    int buf_size
     ModelFrame(cl_device_id, cl_context)
     unsigned char * prepare(cl_mem, int, int, int, int, mat3, cl_mem*)
diff --git a/selfdrive/modeld/models/commonmodel_pyx.pyx b/selfdrive/modeld/models/commonmodel_pyx.pyx
@@ -42,4 +42,4 @@ cdef class ModelFrame:
       data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection, output.mem)
     if not data:
       return None
-    return np.asarray(<cnp.uint8_t[:self.frame.MODEL_FRAME_SIZE]> data)
+    return np.asarray(<cnp.uint8_t[:self.frame.buf_size]> data)