Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make img pipeline all uint8 #33642

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion selfdrive/modeld/modeld.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@

METADATA_PATH = Path(__file__).parent / 'models/supercombo_metadata.pkl'

MODEL_WIDTH = 512
MODEL_HEIGHT = 256
MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT * 3 // 2

class FrameMeta:
frame_id: int = 0
timestamp_sof: int = 0
Expand Down Expand Up @@ -63,6 +67,9 @@ def __init__(self, context: CLContext):
'features_buffer': np.zeros(ModelConstants.HISTORY_BUFFER_LEN * ModelConstants.FEATURE_LEN, dtype=np.float32),
}

self.input_imgs = np.zeros(MODEL_FRAME_SIZE*2, dtype=np.uint8)
self.big_input_imgs = np.zeros(MODEL_FRAME_SIZE*2, dtype=np.uint8)

with open(METADATA_PATH, 'rb') as f:
model_metadata = pickle.load(f)

Expand Down Expand Up @@ -99,6 +106,7 @@ def run(self, buf: VisionBuf, wbuf: VisionBuf, transform: np.ndarray, transform_
if wbuf is not None:
self.model.setInputBuffer("big_input_imgs", self.wide_frame.prepare(wbuf, transform_wide.flatten(), self.model.getCLBuffer("big_input_imgs")))


if prepare_only:
return None

Expand Down Expand Up @@ -174,7 +182,6 @@ def main(demo=False):
CP = convert_to_capnp(get_demo_car_params())
else:
CP = messaging.log_from_bytes(params.get("CarParams", block=True), car.CarParams)

cloudlog.info("modeld got CarParams: %s", CP.carName)

# TODO this needs more thought, use .2s extra for now to estimate other delays
Expand Down
14 changes: 5 additions & 9 deletions selfdrive/modeld/models/commonmodel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,24 @@
#include "common/clutil.h"

ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) {
input_frames = std::make_unique<float[]>(buf_size);
input_frames = std::make_unique<uint8_t[]>(buf_size);

q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_WIDTH * MODEL_HEIGHT, NULL, &err));
u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
net_input_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_FRAME_SIZE * sizeof(float), NULL, &err));
net_input_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_FRAME_SIZE * sizeof(uint8_t), NULL, &err));

transform_init(&transform, context, device_id);
loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT);
}

float* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3 &projection, cl_mem *output) {
transform_queue(&this->transform, q,
yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset,
y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, projection);

uint8_t* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3 &projection, cl_mem *output) {
if (output == NULL) {
loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl);

std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], sizeof(float) * MODEL_FRAME_SIZE);
CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(float), &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr));
std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], sizeof(uint8_t) * MODEL_FRAME_SIZE);
CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(uint8_t), &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr));
clFinish(q);
return &input_frames[0];
} else {
Expand Down
4 changes: 2 additions & 2 deletions selfdrive/modeld/models/commonmodel.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class ModelFrame {
public:
ModelFrame(cl_device_id device_id, cl_context context);
~ModelFrame();
float* prepare(cl_mem yuv_cl, int width, int height, int frame_stride, int frame_uv_offset, const mat3& transform, cl_mem *output);
uint8_t* prepare(cl_mem yuv_cl, int width, int height, int frame_stride, int frame_uv_offset, const mat3& transform, cl_mem *output);

const int MODEL_WIDTH = 512;
const int MODEL_HEIGHT = 256;
Expand All @@ -32,5 +32,5 @@ class ModelFrame {
LoadYUVState loadyuv;
cl_command_queue q;
cl_mem y_cl, u_cl, v_cl, net_input_cl;
std::unique_ptr<float[]> input_frames;
std::unique_ptr<uint8_t[]> input_frames;
};
2 changes: 1 addition & 1 deletion selfdrive/modeld/models/commonmodel.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ cdef extern from "selfdrive/modeld/models/commonmodel.h":
cppclass ModelFrame:
int buf_size
ModelFrame(cl_device_id, cl_context)
float * prepare(cl_mem, int, int, int, int, mat3, cl_mem*)
unsigned char * prepare(cl_mem, int, int, int, int, mat3, cl_mem*)
4 changes: 2 additions & 2 deletions selfdrive/modeld/models/commonmodel_pyx.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ cdef class ModelFrame:
def prepare(self, VisionBuf buf, float[:] projection, CLMem output):
cdef mat3 cprojection
memcpy(cprojection.v, &projection[0], 9*sizeof(float))
cdef float * data
cdef unsigned char * data
if output is None:
data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection, NULL)
else:
data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection, output.mem)
if not data:
return None
return np.asarray(<cnp.float32_t[:self.frame.buf_size]> data)
return np.asarray(<cnp.uint8_t[:self.frame.buf_size]> data)
4 changes: 2 additions & 2 deletions selfdrive/modeld/models/supercombo.onnx
Git LFS file not shown
18 changes: 8 additions & 10 deletions selfdrive/modeld/transforms/loadyuv.cl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#define UV_SIZE ((TRANSFORMED_WIDTH/2)*(TRANSFORMED_HEIGHT/2))

__kernel void loadys(__global uchar8 const * const Y,
__global float * out,
__global uchar * out,
int out_offset)
{
const int gid = get_global_id(0);
Expand All @@ -10,13 +10,12 @@ __kernel void loadys(__global uchar8 const * const Y,
const int ox = ois % TRANSFORMED_WIDTH;

const uchar8 ys = Y[gid];
const float8 ysf = convert_float8(ys);

// 02
// 13

__global float* outy0;
__global float* outy1;
__global uchar* outy0;
__global uchar* outy1;
if ((oy & 1) == 0) {
outy0 = out + out_offset; //y0
outy1 = out + out_offset + UV_SIZE*2; //y2
Expand All @@ -25,21 +24,20 @@ __kernel void loadys(__global uchar8 const * const Y,
outy1 = out + out_offset + UV_SIZE*3; //y3
}

vstore4(ysf.s0246, 0, outy0 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
vstore4(ysf.s1357, 0, outy1 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
vstore4(ys.s0246, 0, outy0 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
vstore4(ys.s1357, 0, outy1 + (oy/2) * (TRANSFORMED_WIDTH/2) + ox/2);
}

__kernel void loaduv(__global uchar8 const * const in,
__global float8 * out,
__global uchar8 * out,
int out_offset)
{
const int gid = get_global_id(0);
const uchar8 inv = in[gid];
const float8 outv = convert_float8(inv);
out[gid + out_offset / 8] = outv;
out[gid + out_offset / 8] = inv;
}

__kernel void copy(__global float8 * inout,
__kernel void copy(__global uchar8 * inout,
int in_offset)
{
const int gid = get_global_id(0);
Expand Down
2 changes: 1 addition & 1 deletion tinygrad_repo
Loading