runs ok

commaai · Sep 25, 2024 · ab2fa26 · ab2fa26
1 parent 4cc0969
commit ab2fa26
Show file tree

Hide file tree

Showing 6 changed files with 16 additions and 22 deletions.
diff --git a/selfdrive/modeld/modeld.py b/selfdrive/modeld/modeld.py
@@ -65,11 +65,10 @@ def __init__(self, context: CLContext):
       'lateral_control_params': np.zeros(ModelConstants.LATERAL_CONTROL_PARAMS_LEN, dtype=np.float32),
       'prev_desired_curv': np.zeros(ModelConstants.PREV_DESIRED_CURV_LEN * (ModelConstants.HISTORY_BUFFER_LEN+1), dtype=np.float32),
       'features_buffer': np.zeros(ModelConstants.HISTORY_BUFFER_LEN * ModelConstants.FEATURE_LEN, dtype=np.float32),
+      'input_imgs': np.zeros(MODEL_FRAME_SIZE*2, dtype=np.uint8),
+      'big_input_imgs': np.zeros(MODEL_FRAME_SIZE*2, dtype=np.uint8),
     }
 
-    self.input_imgs = np.zeros(MODEL_FRAME_SIZE*2, dtype=np.uint8)
-    self.big_input_imgs = np.zeros(MODEL_FRAME_SIZE*2, dtype=np.uint8)
-
     with open(METADATA_PATH, 'rb') as f:
       model_metadata = pickle.load(f)
 
@@ -79,8 +78,6 @@ def __init__(self, context: CLContext):
     self.parser = Parser()
 
     self.model = ModelRunner(MODEL_PATHS, self.output, Runtime.GPU, False, context)
-    self.model.addInput("input_imgs", None)
-    self.model.addInput("big_input_imgs", None)
     for k,v in self.inputs.items():
       self.model.addInput(k, v)
 
@@ -102,14 +99,10 @@ def run(self, buf: VisionBuf, wbuf: VisionBuf, transform: np.ndarray, transform_
     self.inputs['lateral_control_params'][:] = inputs['lateral_control_params']
 
     new_img = self.frame.prepare(buf, transform.flatten(), self.model.getCLBuffer("input_imgs"))
-    self.input_imgs[:MODEL_FRAME_SIZE] = self.input_imgs[-MODEL_FRAME_SIZE:]
-    self.input_imgs[MODEL_FRAME_SIZE:] = new_img[:]
-    self.model.setInputBuffer("input_imgs", self.input_imgs.view(np.float32))
+    self.model.setInputBuffer("input_imgs", new_img)
     if wbuf is not None:
       new_big_img = self.wide_frame.prepare(wbuf, transform_wide.flatten(), self.model.getCLBuffer("big_input_imgs"))
-      self.big_input_imgs[:MODEL_FRAME_SIZE] = self.big_input_imgs[-MODEL_FRAME_SIZE:]
-      self.big_input_imgs[MODEL_FRAME_SIZE:] = new_big_img[:]
-      self.model.setInputBuffer("big_input_imgs", self.big_input_imgs.view(np.float32))
+      self.model.setInputBuffer("big_input_imgs", new_big_img)
 
 
     if prepare_only:

diff --git a/selfdrive/modeld/models/commonmodel.cc b/selfdrive/modeld/models/commonmodel.cc
@@ -7,7 +7,7 @@
 #include "common/clutil.h"
 
 ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) {
-  frame = std::make_unique<uint8_t[]>(MODEL_FRAME_SIZE);
+  input_frames = std::make_unique<uint8_t[]>(buf_size);
 
   q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
   y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_WIDTH * MODEL_HEIGHT, NULL, &err));
@@ -20,13 +20,12 @@ ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) {
 }
 
 uint8_t* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3 &projection, cl_mem *output) {
-  transform_queue(&this->transform, q,
-                  yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset,
-                  y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, projection);
-  loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl);
-  CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(uint8_t), &frame[0], 0, nullptr, nullptr));
-  clFinish(q);
-  return &frame[0];
+    loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl);
+
+    std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], sizeof(uint8_t) * MODEL_FRAME_SIZE);
+    CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(uint8_t), &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr));
+    clFinish(q);
+    return &input_frames[0];
 }
 
 ModelFrame::~ModelFrame() {

diff --git a/selfdrive/modeld/models/commonmodel.h b/selfdrive/modeld/models/commonmodel.h
@@ -25,11 +25,12 @@ class ModelFrame {
   const int MODEL_WIDTH = 512;
   const int MODEL_HEIGHT = 256;
   const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2;
+  const int buf_size = MODEL_FRAME_SIZE * 2;
 
 private:
   Transform transform;
   LoadYUVState loadyuv;
   cl_command_queue q;
   cl_mem y_cl, u_cl, v_cl, net_input_cl;
-  std::unique_ptr<uint8_t[]> frame;
+  std::unique_ptr<uint8_t[]> input_frames;
 };
diff --git a/selfdrive/modeld/models/commonmodel.pxd b/selfdrive/modeld/models/commonmodel.pxd
@@ -13,6 +13,6 @@ cdef extern from "common/clutil.h":
 
 cdef extern from "selfdrive/modeld/models/commonmodel.h":
   cppclass ModelFrame:
-    int MODEL_FRAME_SIZE
+    int buf_size
     ModelFrame(cl_device_id, cl_context)
     unsigned char * prepare(cl_mem, int, int, int, int, mat3, cl_mem*)
diff --git a/selfdrive/modeld/models/commonmodel_pyx.pyx b/selfdrive/modeld/models/commonmodel_pyx.pyx
@@ -42,4 +42,4 @@ cdef class ModelFrame:
       data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection, output.mem)
     if not data:
       return None
-    return np.asarray(<cnp.uint8_t[:self.frame.MODEL_FRAME_SIZE]> data)
+    return np.asarray(<cnp.uint8_t[:self.frame.buf_size]> data)
diff --git a/selfdrive/modeld/runners/runmodel_pyx.pyx b/selfdrive/modeld/runners/runmodel_pyx.pyx
@@ -5,6 +5,7 @@ from libcpp.string cimport string
 
 from .runmodel cimport USE_CPU_RUNTIME, USE_GPU_RUNTIME, USE_DSP_RUNTIME
 from selfdrive.modeld.models.commonmodel_pyx cimport CLMem
+import numpy as np
 
 class Runtime:
   CPU = USE_CPU_RUNTIME