From 2a569930496e11fe414fd6395f6cda8030e603c3 Mon Sep 17 00:00:00 2001 From: Bruce Wayne Date: Tue, 24 Sep 2024 19:39:32 -0700 Subject: [PATCH] All in modeld --- selfdrive/modeld/modeld.py | 37 +++++---- selfdrive/modeld/models/commonmodel.cc | 19 ++--- selfdrive/modeld/runners/tinygradmodel.py | 94 ----------------------- 3 files changed, 24 insertions(+), 126 deletions(-) delete mode 100644 selfdrive/modeld/runners/tinygradmodel.py diff --git a/selfdrive/modeld/modeld.py b/selfdrive/modeld/modeld.py index 5585d52750b858..e0956861158549 100755 --- a/selfdrive/modeld/modeld.py +++ b/selfdrive/modeld/modeld.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 import os -# TODO this is hack +## TODO this is hack os.environ['QCOM'] = '1' import time import pickle @@ -25,7 +25,10 @@ from openpilot.selfdrive.modeld.fill_model_msg import fill_model_msg, fill_pose_msg, PublishState from openpilot.selfdrive.modeld.constants import ModelConstants from openpilot.selfdrive.modeld.models.commonmodel_pyx import ModelFrame, CLContext -from openpilot.selfdrive.modeld.runners.tinygradmodel import TinygradModel + +from tinygrad.tensor import Tensor +Tensor.manual_seed(1337) +Tensor.no_grad = True PROCESS_NAME = "selfdrive.modeld.modeld" SEND_RAW_PRED = os.getenv('SEND_RAW_PRED') @@ -50,18 +53,19 @@ class ModelState: inputs: dict[str, np.ndarray] output: np.ndarray prev_desire: np.ndarray # for tracking the rising edge of the pulse - model: TinygradModel def __init__(self, context: CLContext): self.frame = ModelFrame(context) self.wide_frame = ModelFrame(context) self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32) self.inputs = { - 'desire': np.zeros(ModelConstants.DESIRE_LEN * (ModelConstants.HISTORY_BUFFER_LEN+1), dtype=np.float32), - 'traffic_convention': np.zeros(ModelConstants.TRAFFIC_CONVENTION_LEN, dtype=np.float32), - 'lateral_control_params': np.zeros(ModelConstants.LATERAL_CONTROL_PARAMS_LEN, dtype=np.float32), - 'prev_desired_curv': np.zeros(ModelConstants.PREV_DESIRED_CURV_LEN * (ModelConstants.HISTORY_BUFFER_LEN+1), dtype=np.float32), - 'features_buffer': np.zeros(ModelConstants.HISTORY_BUFFER_LEN * ModelConstants.FEATURE_LEN, dtype=np.float32), + 'input_imgs': np.zeros((1, 12, 128, 256), dtype=np.float16), + 'big_input_imgs': np.zeros((1, 12, 128, 256), dtype=np.float16), + 'desire': np.zeros((1, (ModelConstants.HISTORY_BUFFER_LEN+1), ModelConstants.DESIRE_LEN), dtype=np.float16), + 'traffic_convention': np.zeros((1, ModelConstants.TRAFFIC_CONVENTION_LEN), dtype=np.float16), + 'lateral_control_params': np.zeros((1, ModelConstants.LATERAL_CONTROL_PARAMS_LEN), dtype=np.float16), + 'prev_desired_curv': np.zeros((1,(ModelConstants.HISTORY_BUFFER_LEN+1), ModelConstants.PREV_DESIRED_CURV_LEN), dtype=np.float16), + 'features_buffer': np.zeros((1, ModelConstants.HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float16), } with open(METADATA_PATH, 'rb') as f: @@ -72,12 +76,8 @@ def __init__(self, context: CLContext): self.output = np.zeros(net_output_size, dtype=np.float32) self.parser = Parser() - self.model = TinygradModel(MODEL_PATH, MODEL_PKL_PATH, self.output) - - self.model.addInput("input_imgs", None) - self.model.addInput("big_input_imgs", None) - for k,v in self.inputs.items(): - self.model.addInput(k, v) + with open(MODEL_PKL_PATH, "rb") as f: + self.model_run = pickle.load(f) def slice_outputs(self, model_outputs: np.ndarray) -> dict[str, np.ndarray]: parsed_model_outputs = {k: model_outputs[np.newaxis, v] for k,v in self.output_slices.items()} @@ -95,16 +95,15 @@ def run(self, buf: VisionBuf, wbuf: VisionBuf, transform: np.ndarray, transform_ self.inputs['traffic_convention'][:] = inputs['traffic_convention'] self.inputs['lateral_control_params'][:] = inputs['lateral_control_params'] - - # if getCLBuffer is not None, frame will be None - self.model.setInputBuffer("input_imgs", self.frame.prepare(buf, transform.flatten(), self.model.getCLBuffer("input_imgs"))) + self.inputs['input_imgs'] = self.frame.prepare(buf, transform.flatten(), None).astype(np.float16).reshape(self.inputs['input_imgs'].shape) if wbuf is not None: - self.model.setInputBuffer("big_input_imgs", self.wide_frame.prepare(wbuf, transform_wide.flatten(), self.model.getCLBuffer("big_input_imgs"))) + self.inputs['big_input_imgs'] = self.wide_frame.prepare(wbuf, transform_wide.flatten(), None).astype(np.float16).reshape(self.inputs['input_imgs'].shape) if prepare_only: return None - self.model.execute() + self.tensor_inputs = {k: Tensor(v) for k, v in self.inputs.items()} + self.output = self.model_run(**self.tensor_inputs)['outputs'].numpy().flatten() outputs = self.parser.parse_outputs(self.slice_outputs(self.output)) self.inputs['features_buffer'][:-ModelConstants.FEATURE_LEN] = self.inputs['features_buffer'][ModelConstants.FEATURE_LEN:] diff --git a/selfdrive/modeld/models/commonmodel.cc b/selfdrive/modeld/models/commonmodel.cc index 57c14dfa881189..5dab3f6d94f231 100644 --- a/selfdrive/modeld/models/commonmodel.cc +++ b/selfdrive/modeld/models/commonmodel.cc @@ -24,19 +24,12 @@ float* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset, y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, projection); - if (output == NULL) { - loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl); - - std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], sizeof(float) * MODEL_FRAME_SIZE); - CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(float), &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr)); - clFinish(q); - return &input_frames[0]; - } else { - loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, *output, true); - // NOTE: Since thneed is using a different command queue, this clFinish is needed to ensure the image is ready. - clFinish(q); - return NULL; - } + loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, net_input_cl); + + std::memmove(&input_frames[0], &input_frames[MODEL_FRAME_SIZE], sizeof(float) * MODEL_FRAME_SIZE); + CL_CHECK(clEnqueueReadBuffer(q, net_input_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(float), &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr)); + clFinish(q); + return &input_frames[0]; } ModelFrame::~ModelFrame() { diff --git a/selfdrive/modeld/runners/tinygradmodel.py b/selfdrive/modeld/runners/tinygradmodel.py deleted file mode 100644 index 3ecce8b1cb818d..00000000000000 --- a/selfdrive/modeld/runners/tinygradmodel.py +++ /dev/null @@ -1,94 +0,0 @@ -import os -import sys -import pickle -from pathlib import Path - -OPENPILOT_ROOT = Path(__file__).parent.parent.parent.parent -if not (OPENPILOT_ROOT / 'tinygrad_repo').exists(): - OPENPILOT_ROOT = OPENPILOT_ROOT.parent # This takes us from openpilot/openpilot to openpilot but it's probably wrong -TINYGRAD_REPO_PATH = OPENPILOT_ROOT / 'tinygrad_repo' -assert TINYGRAD_REPO_PATH.exists() -assert (TINYGRAD_REPO_PATH / 'extra').exists() -sys.path.append(str(TINYGRAD_REPO_PATH)) - -if os.getenv("OPT", None) is None: - os.environ['OPT'] = '99' -if os.getenv("GPU", None) is None: - os.environ['GPU'] = '1' -if os.getenv("IMAGE", None) is None: - os.environ['IMAGE'] = '2' - -import onnx -import numpy as np -from tinygrad.tensor import Tensor -from tinygrad.dtype import dtypes - -ONNX_TYPES_TO_NP_TYPES: dict[int, np.dtype] = { - i: onnx.helper.tensor_dtype_to_np_dtype(i) - for dtype, i in onnx.TensorProto.DataType.items() - if dtype in ['FLOAT', 'FLOAT16', 'INT64', 'INT32', 'UINT8'] -} - - -class TinygradModel(object): - def __init__(self, onnx_path, pkl_path, output): - self.inputs = {} - self.output = output - - Tensor.manual_seed(1337) - Tensor.no_grad = True - - onnx_model = onnx.load(onnx_path) - with open(pkl_path, "rb") as f: - self.run = pickle.load(f) - self.input_shapes = {inp.name:tuple(x.dim_value for x in inp.type.tensor_type.shape.dim) for inp in onnx_model.graph.input} - self.input_dtypes = {inp.name: ONNX_TYPES_TO_NP_TYPES[inp.type.tensor_type.elem_type] for inp in onnx_model.graph.input} - - - def addInput(self, name, val): - assert name in self.input_shapes - if val is not None: - self.inputs[name] = Tensor(val).reshape(self.input_shapes[name]) - else: - self.inputs[name] = None - - def setInputBuffer(self, name, val): - assert name in self.inputs - self.inputs[name] = Tensor(val).reshape(self.input_shapes[name]) - - def getCLBuffer(self, name): - return None - - def execute(self): - outputs = self.run(**self.inputs) - return outputs['outputs'].numpy() - - -if __name__ == "__main__": - import pickle - from tqdm import trange - from openpilot.selfdrive.modeld.constants import ModelConstants - - MODEL_PKL_PATH = Path(__file__).parent.parent / 'models/supercombo_tinygrad.pkl' - - with open(MODEL_PKL_PATH, "rb") as f: - model_run = pickle.load(f) - inputs = { - 'input_imgs': np.zeros((1, 12, 128, 256), dtype=np.float16), - 'big_input_imgs': np.zeros((1, 12, 128, 256), dtype=np.float16), - 'desire': np.zeros((1, (ModelConstants.HISTORY_BUFFER_LEN+1), ModelConstants.DESIRE_LEN), dtype=np.float16), - 'traffic_convention': np.zeros((1, ModelConstants.TRAFFIC_CONVENTION_LEN), dtype=np.float16), - 'lateral_control_params': np.zeros((1, ModelConstants.LATERAL_CONTROL_PARAMS_LEN), dtype=np.float16), - 'prev_desired_curv': np.zeros((1,(ModelConstants.HISTORY_BUFFER_LEN+1), ModelConstants.PREV_DESIRED_CURV_LEN), dtype=np.float16), - 'features_buffer': np.zeros((1, ModelConstants.HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float16), - } - tensor_inputs = {k: Tensor(v) for k, v in inputs.items()} - - import time - t0 = time.time() - for _ in trange(100): - for k, v in inputs.items(): - tensor_inputs[k] = Tensor(v) - model_run(**tensor_inputs) - t1 = time.time() - print(f"100 iterations with average of {(t1-t0)*10:.2f}ms/it")