From 876f1921124059aa0395ed5131d7943ca10364f8 Mon Sep 17 00:00:00 2001 From: ZwX1616 Date: Thu, 26 Sep 2024 16:40:44 -0700 Subject: [PATCH] exec DM model with gpu (#33609) * half old-commit-hash: 9f72eca003d4637ca7fe847ebaf925c694fc2e84 * optimed old-commit-hash: 6e36e2a12e09275ec21d1590012a92b05ca52ff5 * thneed old-commit-hash: 419a06c09c0c767d828bcd1e118bc575898c343a * exec old-commit-hash: 0059c27ec11b076a37f65d604ed135ea6541b1a6 * runner old-commit-hash: 34232ada94450ce541eaef546197fa219810a891 * runs but old-commit-hash: 3db37c00b6a64908293b4de8d8b56e80308cd8f2 * it is 01 old-commit-hash: a160d81eb1a7e77abbef959b44f602610f68f665 * np old-commit-hash: c1caff6ba648cc2c0094c71b2ea074f01c3c2dc8 * module url old-commit-hash: 6f4902c4d384263a53e2c1d14d93b5ff864b6a5f * new old-commit-hash: 779ae79b1bc3df6374fb6663ac8592e107a6e504 * ds fast * is this work * corcention * real timing * no reg * interim gather * 0e4a9c7b * fa69be01, and halve * list * cleanup * slighly faster * setprotlt * expected * replay ref * more powar * reluctantly * bump tg * 8 * less * less * bump tg * better than exp * closer * cc * see diff * commits * was right * to 32 cast * remove dlc file * support both * dspExecutionTime -> gpuExecutionTime * ignore * time ref * ref commit * last --------- Co-authored-by: Comma Device --- .gitattributes | 1 - cereal/log.capnp | 3 +- selfdrive/modeld/SConscript | 4 ++ selfdrive/modeld/dmonitoringmodeld | 10 +++++ selfdrive/modeld/dmonitoringmodeld.py | 37 +++++++++++-------- .../modeld/models/dmonitoring_model.current | 4 +- .../modeld/models/dmonitoring_model.onnx | 4 +- .../modeld/models/dmonitoring_model_q.dlc | 3 -- selfdrive/modeld/runners/onnxmodel.py | 3 +- selfdrive/monitoring/helpers.py | 4 +- selfdrive/test/process_replay/model_replay.py | 2 +- .../process_replay/model_replay_ref_commit | 2 +- .../test/process_replay/process_replay.py | 2 +- selfdrive/test/test_onroad.py | 3 +- system/hardware/tici/tests/test_power_draw.py | 2 +- system/manager/process_config.py | 2 +- tinygrad_repo | 2 +- 17 files changed, 53 insertions(+), 35 deletions(-) create mode 100755 selfdrive/modeld/dmonitoringmodeld delete mode 100644 selfdrive/modeld/models/dmonitoring_model_q.dlc diff --git a/.gitattributes b/.gitattributes index 8781a7371f80b1..eda2505d0e3ac8 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2,7 +2,6 @@ # to move existing files into LFS: # git add --renormalize . -*.dlc filter=lfs diff=lfs merge=lfs -text *.onnx filter=lfs diff=lfs merge=lfs -text *.svg filter=lfs diff=lfs merge=lfs -text *.png filter=lfs diff=lfs merge=lfs -text diff --git a/cereal/log.capnp b/cereal/log.capnp index c955cbf154ab09..3d5f7d9d7f5b96 100644 --- a/cereal/log.capnp +++ b/cereal/log.capnp @@ -2012,7 +2012,8 @@ struct Joystick { struct DriverStateV2 { frameId @0 :UInt32; modelExecutionTime @1 :Float32; - dspExecutionTime @2 :Float32; + dspExecutionTimeDEPRECATED @2 :Float32; + gpuExecutionTime @8 :Float32; rawPredictions @3 :Data; poorVisionProb @4 :Float32; diff --git a/selfdrive/modeld/SConscript b/selfdrive/modeld/SConscript index d572915c721a48..d4729984163620 100644 --- a/selfdrive/modeld/SConscript +++ b/selfdrive/modeld/SConscript @@ -69,6 +69,10 @@ if arch == "larch64" or GetOption('pc_thneed'): lenv.Command(fn + ".thneed", [fn + ".onnx"] + tinygrad_files, cmd) + fn_dm = File("models/dmonitoring_model").abspath + cmd = f"cd {Dir('#').abspath}/tinygrad_repo && " + ' '.join(tinygrad_opts) + f" python3 openpilot/compile2.py {fn_dm}.onnx {fn_dm}.thneed" + lenv.Command(fn_dm + ".thneed", [fn_dm + ".onnx"] + tinygrad_files, cmd) + thneed_lib = env.SharedLibrary('thneed', thneed_src, LIBS=[gpucommon, common, 'OpenCL', 'dl']) thneedmodel_lib = env.Library('thneedmodel', ['runners/thneedmodel.cc']) lenvCython.Program('runners/thneedmodel_pyx.so', 'runners/thneedmodel_pyx.pyx', LIBS=envCython["LIBS"]+[thneedmodel_lib, thneed_lib, gpucommon, common, 'dl', 'OpenCL']) diff --git a/selfdrive/modeld/dmonitoringmodeld b/selfdrive/modeld/dmonitoringmodeld new file mode 100755 index 00000000000000..80157e17519600 --- /dev/null +++ b/selfdrive/modeld/dmonitoringmodeld @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)" +cd "$DIR/../../" + +if [ -f "$DIR/libthneed.so" ]; then + export LD_PRELOAD="$DIR/libthneed.so" +fi + +exec "$DIR/dmonitoringmodeld.py" "$@" diff --git a/selfdrive/modeld/dmonitoringmodeld.py b/selfdrive/modeld/dmonitoringmodeld.py index 0187184a7acf44..3030b31a0cf4bb 100755 --- a/selfdrive/modeld/dmonitoringmodeld.py +++ b/selfdrive/modeld/dmonitoringmodeld.py @@ -6,6 +6,7 @@ import ctypes import numpy as np from pathlib import Path +from setproctitle import setproctitle from cereal import messaging from cereal.messaging import PubMaster, SubMaster @@ -14,16 +15,19 @@ from openpilot.common.params import Params from openpilot.common.realtime import set_realtime_priority from openpilot.selfdrive.modeld.runners import ModelRunner, Runtime +from openpilot.selfdrive.modeld.models.commonmodel_pyx import CLContext from openpilot.selfdrive.modeld.parse_model_outputs import sigmoid CALIB_LEN = 3 -REG_SCALE = 0.25 MODEL_WIDTH = 1440 MODEL_HEIGHT = 960 -OUTPUT_SIZE = 84 +FEATURE_LEN = 512 +OUTPUT_SIZE = 84 + FEATURE_LEN + +PROCESS_NAME = "selfdrive.modeld.dmonitoringmodeld" SEND_RAW_PRED = os.getenv('SEND_RAW_PRED') MODEL_PATHS = { - ModelRunner.SNPE: Path(__file__).parent / 'models/dmonitoring_model_q.dlc', + ModelRunner.THNEED: Path(__file__).parent / 'models/dmonitoring_model.thneed', ModelRunner.ONNX: Path(__file__).parent / 'models/dmonitoring_model.onnx'} class DriverStateResult(ctypes.Structure): @@ -49,21 +53,22 @@ class DMonitoringModelResult(ctypes.Structure): ("driver_state_lhd", DriverStateResult), ("driver_state_rhd", DriverStateResult), ("poor_vision_prob", ctypes.c_float), - ("wheel_on_right_prob", ctypes.c_float)] + ("wheel_on_right_prob", ctypes.c_float), + ("features", ctypes.c_float*FEATURE_LEN)] class ModelState: inputs: dict[str, np.ndarray] output: np.ndarray model: ModelRunner - def __init__(self): + def __init__(self, cl_ctx): assert ctypes.sizeof(DMonitoringModelResult) == OUTPUT_SIZE * ctypes.sizeof(ctypes.c_float) self.output = np.zeros(OUTPUT_SIZE, dtype=np.float32) self.inputs = { 'input_img': np.zeros(MODEL_HEIGHT * MODEL_WIDTH, dtype=np.uint8), 'calib': np.zeros(CALIB_LEN, dtype=np.float32)} - self.model = ModelRunner(MODEL_PATHS, self.output, Runtime.DSP, True, None) + self.model = ModelRunner(MODEL_PATHS, self.output, Runtime.GPU, False, cl_ctx) self.model.addInput("input_img", None) self.model.addInput("calib", self.inputs['calib']) @@ -76,17 +81,17 @@ def run(self, buf:VisionBuf, calib:np.ndarray) -> tuple[np.ndarray, float]: input_data = self.inputs['input_img'].reshape(MODEL_HEIGHT, MODEL_WIDTH) input_data[:] = buf_data[v_offset:v_offset+MODEL_HEIGHT, h_offset:h_offset+MODEL_WIDTH] - t1 = time.perf_counter() self.model.setInputBuffer("input_img", self.inputs['input_img'].view(np.float32)) + t1 = time.perf_counter() self.model.execute() t2 = time.perf_counter() return self.output, t2 - t1 def fill_driver_state(msg, ds_result: DriverStateResult): - msg.faceOrientation = [x * REG_SCALE for x in ds_result.face_orientation] + msg.faceOrientation = list(ds_result.face_orientation) msg.faceOrientationStd = [math.exp(x) for x in ds_result.face_orientation_std] - msg.facePosition = [x * REG_SCALE for x in ds_result.face_position[:2]] + msg.facePosition = list(ds_result.face_position[:2]) msg.facePositionStd = [math.exp(x) for x in ds_result.face_position_std[:2]] msg.faceProb = float(sigmoid(ds_result.face_prob)) msg.leftEyeProb = float(sigmoid(ds_result.left_eye_prob)) @@ -98,13 +103,13 @@ def fill_driver_state(msg, ds_result: DriverStateResult): msg.readyProb = [float(sigmoid(x)) for x in ds_result.ready_prob] msg.notReadyProb = [float(sigmoid(x)) for x in ds_result.not_ready_prob] -def get_driverstate_packet(model_output: np.ndarray, frame_id: int, location_ts: int, execution_time: float, dsp_execution_time: float): +def get_driverstate_packet(model_output: np.ndarray, frame_id: int, location_ts: int, execution_time: float, gpu_execution_time: float): model_result = ctypes.cast(model_output.ctypes.data, ctypes.POINTER(DMonitoringModelResult)).contents msg = messaging.new_message('driverStateV2', valid=True) ds = msg.driverStateV2 ds.frameId = frame_id ds.modelExecutionTime = execution_time - ds.dspExecutionTime = dsp_execution_time + ds.gpuExecutionTime = gpu_execution_time ds.poorVisionProb = float(sigmoid(model_result.poor_vision_prob)) ds.wheelOnRightProb = float(sigmoid(model_result.wheel_on_right_prob)) ds.rawPredictions = model_output.tobytes() if SEND_RAW_PRED else b'' @@ -115,14 +120,16 @@ def get_driverstate_packet(model_output: np.ndarray, frame_id: int, location_ts: def main(): gc.disable() + setproctitle(PROCESS_NAME) set_realtime_priority(1) - model = ModelState() + cl_context = CLContext() + model = ModelState(cl_context) cloudlog.warning("models loaded, dmonitoringmodeld starting") Params().put_bool("DmModelInitialized", True) cloudlog.warning("connecting to driver stream") - vipc_client = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_DRIVER, True) + vipc_client = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_DRIVER, True, cl_context) while not vipc_client.connect(False): time.sleep(0.1) assert vipc_client.is_connected() @@ -144,10 +151,10 @@ def main(): calib[:] = np.array(sm["liveCalibration"].rpyCalib) t1 = time.perf_counter() - model_output, dsp_execution_time = model.run(buf, calib) + model_output, gpu_execution_time = model.run(buf, calib) t2 = time.perf_counter() - pm.send("driverStateV2", get_driverstate_packet(model_output, vipc_client.frame_id, vipc_client.timestamp_sof, t2 - t1, dsp_execution_time)) + pm.send("driverStateV2", get_driverstate_packet(model_output, vipc_client.frame_id, vipc_client.timestamp_sof, t2 - t1, gpu_execution_time)) # print("dmonitoring process: %.2fms, from last %.2fms\n" % (t2 - t1, t1 - last)) # last = t1 diff --git a/selfdrive/modeld/models/dmonitoring_model.current b/selfdrive/modeld/models/dmonitoring_model.current index f935ab06b34e27..121871ef2b690b 100644 --- a/selfdrive/modeld/models/dmonitoring_model.current +++ b/selfdrive/modeld/models/dmonitoring_model.current @@ -1,2 +1,2 @@ -5ec97a39-0095-4cea-adfa-6d72b1966cc1 -26cac7a9757a27c783a365403040a1bd27ccdaea \ No newline at end of file +fa69be01-b430-4504-9d72-7dcb058eb6dd +d9fb22d1c4fa3ca3d201dbc8edf1d0f0918e53e6 diff --git a/selfdrive/modeld/models/dmonitoring_model.onnx b/selfdrive/modeld/models/dmonitoring_model.onnx index dd3a6aba2e756e..dcc727510e3f08 100644 --- a/selfdrive/modeld/models/dmonitoring_model.onnx +++ b/selfdrive/modeld/models/dmonitoring_model.onnx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3dd3982940d823c4fbb0429b733a0b78b0688d7d67aa76ff7b754a3e2f3d8683 -size 16132780 +oid sha256:50efe6451a3fb3fa04b6bb0e846544533329bd46ecefe9e657e91214dee2aaeb +size 7196502 diff --git a/selfdrive/modeld/models/dmonitoring_model_q.dlc b/selfdrive/modeld/models/dmonitoring_model_q.dlc deleted file mode 100644 index fc285954d4aa61..00000000000000 --- a/selfdrive/modeld/models/dmonitoring_model_q.dlc +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7c26f13816b143f5bb29ac2980f8557bd5687a75729e4d895313fb9a5a1f0f46 -size 4488449 diff --git a/selfdrive/modeld/runners/onnxmodel.py b/selfdrive/modeld/runners/onnxmodel.py index a570e34305050e..2b174227406521 100644 --- a/selfdrive/modeld/runners/onnxmodel.py +++ b/selfdrive/modeld/runners/onnxmodel.py @@ -67,7 +67,6 @@ class ONNXModel(RunModel): def __init__(self, path, output, runtime, use_tf8, cl_context): self.inputs = {} self.output = output - self.use_tf8 = use_tf8 self.session = create_ort_session(path, fp16_to_fp32=True) self.input_names = [x.name for x in self.session.get_inputs()] @@ -91,7 +90,7 @@ def getCLBuffer(self, name): return None def execute(self): - inputs = {k: (v.view(np.uint8) / 255. if self.use_tf8 and k == 'input_img' else v) for k,v in self.inputs.items()} + inputs = {k: v.view(self.input_dtypes[k]) for k,v in self.inputs.items()} inputs = {k: v.reshape(self.input_shapes[k]).astype(self.input_dtypes[k]) for k,v in inputs.items()} outputs = self.session.run(None, inputs) assert len(outputs) == 1, "Only single model outputs are supported" diff --git a/selfdrive/monitoring/helpers.py b/selfdrive/monitoring/helpers.py index 54c81daeab643f..72df724532bd71 100644 --- a/selfdrive/monitoring/helpers.py +++ b/selfdrive/monitoring/helpers.py @@ -33,8 +33,8 @@ def __init__(self): self._SG_THRESHOLD = 0.9 self._BLINK_THRESHOLD = 0.865 - self._EE_THRESH11 = 0.25 - self._EE_THRESH12 = 7.5 + self._EE_THRESH11 = 0.4 + self._EE_THRESH12 = 15.0 self._EE_MAX_OFFSET1 = 0.06 self._EE_MIN_OFFSET1 = 0.025 self._EE_THRESH21 = 0.01 diff --git a/selfdrive/test/process_replay/model_replay.py b/selfdrive/test/process_replay/model_replay.py index 269cf464a3721a..ead3a3b4b593e8 100755 --- a/selfdrive/test/process_replay/model_replay.py +++ b/selfdrive/test/process_replay/model_replay.py @@ -109,7 +109,7 @@ def model_replay(lr, frs): 'modelV2.frameDropPerc', 'modelV2.modelExecutionTime', 'driverStateV2.modelExecutionTime', - 'driverStateV2.dspExecutionTime' + 'driverStateV2.gpuExecutionTime' ] if PC: # TODO We ignore whole bunch so we can compare important stuff diff --git a/selfdrive/test/process_replay/model_replay_ref_commit b/selfdrive/test/process_replay/model_replay_ref_commit index 7d588b26fa7119..3e4f435964e27b 100644 --- a/selfdrive/test/process_replay/model_replay_ref_commit +++ b/selfdrive/test/process_replay/model_replay_ref_commit @@ -1 +1 @@ -666448fce191e196aac68d06e29a0745e6620db9 +7cd64f431b814adfa11118643efe3822c496922b diff --git a/selfdrive/test/process_replay/process_replay.py b/selfdrive/test/process_replay/process_replay.py index 59f6c6cc537f0c..af33b0458b00fa 100755 --- a/selfdrive/test/process_replay/process_replay.py +++ b/selfdrive/test/process_replay/process_replay.py @@ -585,7 +585,7 @@ def selfdrived_config_callback(params, cfg, lr): proc_name="dmonitoringmodeld", pubs=["liveCalibration", "driverCameraState"], subs=["driverStateV2"], - ignore=["logMonoTime", "driverStateV2.modelExecutionTime", "driverStateV2.dspExecutionTime"], + ignore=["logMonoTime", "driverStateV2.modelExecutionTime", "driverStateV2.gpuExecutionTime"], should_recv_callback=dmonitoringmodeld_rcv_callback, tolerance=NUMPY_TOLERANCE, processing_time=0.020, diff --git a/selfdrive/test/test_onroad.py b/selfdrive/test/test_onroad.py index 5bc6bff1c2a65b..964848e3d31a3b 100644 --- a/selfdrive/test/test_onroad.py +++ b/selfdrive/test/test_onroad.py @@ -32,6 +32,7 @@ * total CPU usage of openpilot (sum(PROCS.values()) should not exceed MAX_TOTAL_CPU """ + MAX_TOTAL_CPU = 265. # total for all 8 cores PROCS = { # Baseline CPU usage by process @@ -312,7 +313,7 @@ def test_memory_usage(self): assert max(mems) - min(mems) <= 3.0 def test_gpu_usage(self): - assert self.gpu_procs == {"weston", "ui", "camerad", "selfdrive.modeld.modeld"} + assert self.gpu_procs == {"weston", "ui", "camerad", "selfdrive.modeld.modeld", "selfdrive.modeld.dmonitoringmodeld"} def test_camera_processing_time(self): result = "\n" diff --git a/system/hardware/tici/tests/test_power_draw.py b/system/hardware/tici/tests/test_power_draw.py index eb3a515b76eeaa..3b9d65b85a218d 100644 --- a/system/hardware/tici/tests/test_power_draw.py +++ b/system/hardware/tici/tests/test_power_draw.py @@ -34,7 +34,7 @@ def name(self): PROCS = [ Proc(['camerad'], 2.1, msgs=['roadCameraState', 'wideRoadCameraState', 'driverCameraState']), Proc(['modeld'], 1.12, atol=0.2, msgs=['modelV2']), - Proc(['dmonitoringmodeld'], 0.4, msgs=['driverStateV2']), + Proc(['dmonitoringmodeld'], 0.5, msgs=['driverStateV2']), Proc(['encoderd'], 0.23, msgs=[]), ] diff --git a/system/manager/process_config.py b/system/manager/process_config.py index bdb549fa4136e9..eca5184c9243fe 100644 --- a/system/manager/process_config.py +++ b/system/manager/process_config.py @@ -70,7 +70,7 @@ def and_(*fns): PythonProcess("micd", "system.micd", iscar), PythonProcess("timed", "system.timed", always_run, enabled=not PC), - PythonProcess("dmonitoringmodeld", "selfdrive.modeld.dmonitoringmodeld", driverview, enabled=(not PC or WEBCAM)), + NativeProcess("dmonitoringmodeld", "selfdrive/modeld", ["./dmonitoringmodeld"], driverview, enabled=(not PC or WEBCAM)), NativeProcess("encoderd", "system/loggerd", ["./encoderd"], only_onroad), NativeProcess("stream_encoderd", "system/loggerd", ["./encoderd", "--stream"], notcar), NativeProcess("loggerd", "system/loggerd", ["./loggerd"], logging), diff --git a/tinygrad_repo b/tinygrad_repo index f51aa0fc7cdbac..3e15fa0daefae7 160000 --- a/tinygrad_repo +++ b/tinygrad_repo @@ -1 +1 @@ -Subproject commit f51aa0fc7cdbac710e640172db280cfb747d2718 +Subproject commit 3e15fa0daefae75e2ddef98f82be5b5d37820631