Skip to content

Commit

Permalink
One less copy
Browse files Browse the repository at this point in the history
  • Loading branch information
haraschax committed Sep 26, 2024
1 parent 27fac49 commit 27c89a0
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 10 deletions.
3 changes: 0 additions & 3 deletions selfdrive/modeld/modeld.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,6 @@ def run(self, buf: VisionBuf, wbuf: VisionBuf, transform: np.ndarray, transform_
tensor_inputs['input_imgs'] = Tensor.from_blob(rawbuf_ptr, IMG_INPUT_SHAPE, dtype=dtypes.uint8, device='QCOM')
else:
tensor_inputs['input_imgs'] = Tensor(self.frame.buffer_from_cl(input_imgs_cl)).reshape(IMG_INPUT_SHAPE)
a = tensor_inputs['input_imgs'].numpy().flatten()

print(a[:10], a[MODEL_FRAME_SIZE:MODEL_FRAME_SIZE+10])

if wbuf is not None:
big_input_imgs_cl = self.wide_frame.prepare(wbuf, transform_wide.flatten(), None)
Expand Down
16 changes: 10 additions & 6 deletions selfdrive/modeld/models/commonmodel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,23 @@ ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) {
y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_WIDTH * MODEL_HEIGHT, NULL, &err));
u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
net_input_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_FRAME_SIZE * sizeof(uint8_t), NULL, &err));

cl_buffer_region region;
region.origin = MODEL_FRAME_SIZE * sizeof(uint8_t);
region.size = MODEL_FRAME_SIZE * sizeof(uint8_t);
latest_frame_cl = CL_CHECK_ERR(clCreateSubBuffer(input_frames_cl, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, &region, &err));

transform_init(&transform, context, device_id);
loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT);
}

cl_mem* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3 &projection, cl_mem *output) {
CL_CHECK(clEnqueueCopyBuffer(q, latest_frame_cl, input_frames_cl, 0, 0, MODEL_FRAME_SIZE * sizeof(uint8_t), 0, nullptr, nullptr));

transform_queue(&this->transform, q,
yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset,
y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, projection);

loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl);
CL_CHECK(clEnqueueCopyBuffer(q, input_frames_cl, input_frames_cl, MODEL_FRAME_SIZE * sizeof(uint8_t), 0, MODEL_FRAME_SIZE * sizeof(uint8_t), 0, nullptr, nullptr));
CL_CHECK(clEnqueueCopyBuffer(q, net_input_cl, input_frames_cl, 0, MODEL_FRAME_SIZE * sizeof(uint8_t), MODEL_FRAME_SIZE * sizeof(uint8_t), 0, nullptr, nullptr));
loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, latest_frame_cl);

// NOTE: Since thneed is using a different command queue, this clFinish is needed to ensure the image is ready.
clFinish(q);
Expand All @@ -43,7 +46,8 @@ uint8_t* ModelFrame::buffer_from_cl(cl_mem *in_frames) {
ModelFrame::~ModelFrame() {
transform_destroy(&transform);
loadyuv_destroy(&loadyuv);
CL_CHECK(clReleaseMemObject(net_input_cl));
CL_CHECK(clReleaseMemObject(latest_frame_cl));
CL_CHECK(clReleaseMemObject(input_frames_cl));
CL_CHECK(clReleaseMemObject(v_cl));
CL_CHECK(clReleaseMemObject(u_cl));
CL_CHECK(clReleaseMemObject(y_cl));
Expand Down
2 changes: 1 addition & 1 deletion selfdrive/modeld/models/commonmodel.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,6 @@ class ModelFrame {
Transform transform;
LoadYUVState loadyuv;
cl_command_queue q;
cl_mem y_cl, u_cl, v_cl, net_input_cl, input_frames_cl;
cl_mem y_cl, u_cl, v_cl, latest_frame_cl, input_frames_cl;
std::unique_ptr<uint8_t[]> input_frames;
};

0 comments on commit 27c89a0

Please sign in to comment.