diff --git a/selfdrive/modeld/modeld.py b/selfdrive/modeld/modeld.py index 806ee3de545169..c22d546be5ef99 100755 --- a/selfdrive/modeld/modeld.py +++ b/selfdrive/modeld/modeld.py @@ -101,15 +101,10 @@ def run(self, buf: VisionBuf, wbuf: VisionBuf, transform: np.ndarray, transform_ self.inputs['traffic_convention'][:] = inputs['traffic_convention'] self.inputs['lateral_control_params'][:] = inputs['lateral_control_params'] - new_img = self.frame.prepare(buf, transform.flatten(), self.model.getCLBuffer("input_imgs")) - self.input_imgs[:MODEL_FRAME_SIZE] = self.input_imgs[-MODEL_FRAME_SIZE:] - self.input_imgs[MODEL_FRAME_SIZE:] = new_img[:] - self.model.setInputBuffer("input_imgs", self.input_imgs.view(np.float32)) + # if getCLBuffer is not None, frame will be None + self.model.setInputBuffer("input_imgs", self.frame.prepare(buf, transform.flatten(), self.model.getCLBuffer("input_imgs"))) if wbuf is not None: - new_big_img = self.wide_frame.prepare(wbuf, transform_wide.flatten(), self.model.getCLBuffer("big_input_imgs")) - self.big_input_imgs[:MODEL_FRAME_SIZE] = self.big_input_imgs[-MODEL_FRAME_SIZE:] - self.big_input_imgs[MODEL_FRAME_SIZE:] = new_big_img[:] - self.model.setInputBuffer("big_input_imgs", self.big_input_imgs.view(np.float32)) + self.model.setInputBuffer("big_input_imgs", self.wide_frame.prepare(wbuf, transform_wide.flatten(), self.model.getCLBuffer("big_input_imgs"))) if prepare_only: diff --git a/selfdrive/modeld/models/commonmodel.cc b/selfdrive/modeld/models/commonmodel.cc index 40134b8acab2c2..b2b73cedf361dd 100644 --- a/selfdrive/modeld/models/commonmodel.cc +++ b/selfdrive/modeld/models/commonmodel.cc @@ -7,7 +7,7 @@ #include "common/clutil.h" ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) { - frame = std::make_unique(MODEL_FRAME_SIZE); + input_frames = std::make_unique(buf_size); q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err)); y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_WIDTH * MODEL_HEIGHT, NULL, &err)); @@ -20,13 +20,19 @@ ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) { } uint8_t* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3 &projection, cl_mem *output) { - transform_queue(&this->transform, q, - yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset, - y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, projection); - loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl); - CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(uint8_t), &frame[0], 0, nullptr, nullptr)); - clFinish(q); - return &frame[0]; + if (output == NULL) { + loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl); + + std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], sizeof(uint8_t) * MODEL_FRAME_SIZE); + CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(uint8_t), &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr)); + clFinish(q); + return &input_frames[0]; + } else { + loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, *output, true); + // NOTE: Since thneed is using a different command queue, this clFinish is needed to ensure the image is ready. + clFinish(q); + return NULL; + } } ModelFrame::~ModelFrame() { diff --git a/selfdrive/modeld/models/commonmodel.h b/selfdrive/modeld/models/commonmodel.h index aa98dadcad78d1..ea394666709f28 100644 --- a/selfdrive/modeld/models/commonmodel.h +++ b/selfdrive/modeld/models/commonmodel.h @@ -25,11 +25,12 @@ class ModelFrame { const int MODEL_WIDTH = 512; const int MODEL_HEIGHT = 256; const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 / 2; + const int buf_size = MODEL_FRAME_SIZE * 2; private: Transform transform; LoadYUVState loadyuv; cl_command_queue q; cl_mem y_cl, u_cl, v_cl, net_input_cl; - std::unique_ptr frame; + std::unique_ptr input_frames; }; diff --git a/selfdrive/modeld/models/commonmodel.pxd b/selfdrive/modeld/models/commonmodel.pxd index af79e815663599..3348af3f174665 100644 --- a/selfdrive/modeld/models/commonmodel.pxd +++ b/selfdrive/modeld/models/commonmodel.pxd @@ -13,6 +13,6 @@ cdef extern from "common/clutil.h": cdef extern from "selfdrive/modeld/models/commonmodel.h": cppclass ModelFrame: - int MODEL_FRAME_SIZE + int buf_size ModelFrame(cl_device_id, cl_context) unsigned char * prepare(cl_mem, int, int, int, int, mat3, cl_mem*) diff --git a/selfdrive/modeld/models/commonmodel_pyx.pyx b/selfdrive/modeld/models/commonmodel_pyx.pyx index d2540e960e620c..99f9c5dc173991 100644 --- a/selfdrive/modeld/models/commonmodel_pyx.pyx +++ b/selfdrive/modeld/models/commonmodel_pyx.pyx @@ -42,4 +42,4 @@ cdef class ModelFrame: data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection, output.mem) if not data: return None - return np.asarray( data) + return np.asarray( data)