Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

exec DM model with gpu #33609

Merged
merged 49 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
d2486a7
half
Aug 28, 2024
8bd3797
optimed
Aug 28, 2024
ec64a4a
thneed
Aug 28, 2024
17b9ead
exec
Aug 28, 2024
04726b6
runner
Aug 28, 2024
73beae8
runs but
Aug 28, 2024
644cfee
it is 01
Aug 28, 2024
acffd13
np
Aug 28, 2024
3228292
module url
ZwX1616 Aug 28, 2024
a2bdc34
new
ZwX1616 Aug 28, 2024
235a003
ds fast
Sep 5, 2024
1d1b55d
is this work
Sep 5, 2024
64286cf
corcention
Sep 5, 2024
cb41257
real timing
Sep 6, 2024
1c1ff47
no reg
Sep 6, 2024
8ecd363
interim gather
ZwX1616 Sep 6, 2024
36ef02c
0e4a9c7b
ZwX1616 Sep 9, 2024
b0ee341
Merge branch 'master' into dmon_thneed
ZwX1616 Sep 13, 2024
8168989
fa69be01, and halve
ZwX1616 Sep 13, 2024
5f3ee4c
list
ZwX1616 Sep 13, 2024
1af5fc4
cleanup
ZwX1616 Sep 13, 2024
de05388
slighly faster
Sep 14, 2024
a16ae3f
setprotlt
ZwX1616 Sep 14, 2024
0aea22a
expected
ZwX1616 Sep 14, 2024
df53f7a
replay ref
ZwX1616 Sep 14, 2024
65fa5a5
more powar
ZwX1616 Sep 17, 2024
d339c6d
reluctantly
ZwX1616 Sep 17, 2024
8bbad95
Merge branch 'master' into dmon_thneed
ZwX1616 Sep 18, 2024
f072713
bump tg
ZwX1616 Sep 18, 2024
cd271d8
8
Sep 18, 2024
4e46460
less
ZwX1616 Sep 18, 2024
d0b29fb
less
ZwX1616 Sep 18, 2024
0a77089
bump tg
ZwX1616 Sep 18, 2024
67302e2
better than exp
ZwX1616 Sep 18, 2024
ac28a8d
closer
ZwX1616 Sep 18, 2024
ae3761e
cc
ZwX1616 Sep 19, 2024
91d1089
see diff
ZwX1616 Sep 19, 2024
122f9b0
commits
ZwX1616 Sep 19, 2024
9bc5614
was right
ZwX1616 Sep 19, 2024
01a5ec3
to 32 cast
ZwX1616 Sep 19, 2024
0712541
remove dlc file
ZwX1616 Sep 19, 2024
5a48eb0
support both
ZwX1616 Sep 19, 2024
202790f
dspExecutionTime -> gpuExecutionTime
ZwX1616 Sep 20, 2024
447f0ed
ignore
ZwX1616 Sep 20, 2024
b4b98b2
time ref
ZwX1616 Sep 20, 2024
2f230b2
gitMerge branch 'master' into dmon_thneed
ZwX1616 Sep 25, 2024
52341b0
ref commit
ZwX1616 Sep 25, 2024
7cd64f4
Merge branch 'master' into dmon_thneed
ZwX1616 Sep 26, 2024
ea8d18e
last
ZwX1616 Sep 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

# to move existing files into LFS:
# git add --renormalize .
*.dlc filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.svg filter=lfs diff=lfs merge=lfs -text
*.png filter=lfs diff=lfs merge=lfs -text
Expand Down
3 changes: 2 additions & 1 deletion cereal/log.capnp
Original file line number Diff line number Diff line change
Expand Up @@ -2012,7 +2012,8 @@ struct Joystick {
struct DriverStateV2 {
frameId @0 :UInt32;
modelExecutionTime @1 :Float32;
dspExecutionTime @2 :Float32;
dspExecutionTimeDEPRECATED @2 :Float32;
gpuExecutionTime @8 :Float32;
rawPredictions @3 :Data;

poorVisionProb @4 :Float32;
Expand Down
4 changes: 4 additions & 0 deletions selfdrive/modeld/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ if arch == "larch64" or GetOption('pc_thneed'):

lenv.Command(fn + ".thneed", [fn + ".onnx"] + tinygrad_files, cmd)

fn_dm = File("models/dmonitoring_model").abspath
cmd = f"cd {Dir('#').abspath}/tinygrad_repo && " + ' '.join(tinygrad_opts) + f" python3 openpilot/compile2.py {fn_dm}.onnx {fn_dm}.thneed"
lenv.Command(fn_dm + ".thneed", [fn_dm + ".onnx"] + tinygrad_files, cmd)

thneed_lib = env.SharedLibrary('thneed', thneed_src, LIBS=[gpucommon, common, 'OpenCL', 'dl'])
thneedmodel_lib = env.Library('thneedmodel', ['runners/thneedmodel.cc'])
lenvCython.Program('runners/thneedmodel_pyx.so', 'runners/thneedmodel_pyx.pyx', LIBS=envCython["LIBS"]+[thneedmodel_lib, thneed_lib, gpucommon, common, 'dl', 'OpenCL'])
10 changes: 10 additions & 0 deletions selfdrive/modeld/dmonitoringmodeld
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env bash

DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)"
cd "$DIR/../../"

if [ -f "$DIR/libthneed.so" ]; then
export LD_PRELOAD="$DIR/libthneed.so"
fi

exec "$DIR/dmonitoringmodeld.py" "$@"
37 changes: 22 additions & 15 deletions selfdrive/modeld/dmonitoringmodeld.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import ctypes
import numpy as np
from pathlib import Path
from setproctitle import setproctitle

from cereal import messaging
from cereal.messaging import PubMaster, SubMaster
Expand All @@ -14,16 +15,19 @@
from openpilot.common.params import Params
from openpilot.common.realtime import set_realtime_priority
from openpilot.selfdrive.modeld.runners import ModelRunner, Runtime
from openpilot.selfdrive.modeld.models.commonmodel_pyx import CLContext
from openpilot.selfdrive.modeld.parse_model_outputs import sigmoid

CALIB_LEN = 3
REG_SCALE = 0.25
MODEL_WIDTH = 1440
MODEL_HEIGHT = 960
OUTPUT_SIZE = 84
FEATURE_LEN = 512
OUTPUT_SIZE = 84 + FEATURE_LEN

PROCESS_NAME = "selfdrive.modeld.dmonitoringmodeld"
SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
MODEL_PATHS = {
ModelRunner.SNPE: Path(__file__).parent / 'models/dmonitoring_model_q.dlc',
ModelRunner.THNEED: Path(__file__).parent / 'models/dmonitoring_model.thneed',
ModelRunner.ONNX: Path(__file__).parent / 'models/dmonitoring_model.onnx'}

class DriverStateResult(ctypes.Structure):
Expand All @@ -49,21 +53,22 @@ class DMonitoringModelResult(ctypes.Structure):
("driver_state_lhd", DriverStateResult),
("driver_state_rhd", DriverStateResult),
("poor_vision_prob", ctypes.c_float),
("wheel_on_right_prob", ctypes.c_float)]
("wheel_on_right_prob", ctypes.c_float),
("features", ctypes.c_float*FEATURE_LEN)]

class ModelState:
inputs: dict[str, np.ndarray]
output: np.ndarray
model: ModelRunner

def __init__(self):
def __init__(self, cl_ctx):
assert ctypes.sizeof(DMonitoringModelResult) == OUTPUT_SIZE * ctypes.sizeof(ctypes.c_float)
self.output = np.zeros(OUTPUT_SIZE, dtype=np.float32)
self.inputs = {
'input_img': np.zeros(MODEL_HEIGHT * MODEL_WIDTH, dtype=np.uint8),
'calib': np.zeros(CALIB_LEN, dtype=np.float32)}

self.model = ModelRunner(MODEL_PATHS, self.output, Runtime.DSP, True, None)
self.model = ModelRunner(MODEL_PATHS, self.output, Runtime.GPU, False, cl_ctx)
self.model.addInput("input_img", None)
self.model.addInput("calib", self.inputs['calib'])

Expand All @@ -76,17 +81,17 @@ def run(self, buf:VisionBuf, calib:np.ndarray) -> tuple[np.ndarray, float]:
input_data = self.inputs['input_img'].reshape(MODEL_HEIGHT, MODEL_WIDTH)
input_data[:] = buf_data[v_offset:v_offset+MODEL_HEIGHT, h_offset:h_offset+MODEL_WIDTH]

t1 = time.perf_counter()
self.model.setInputBuffer("input_img", self.inputs['input_img'].view(np.float32))
t1 = time.perf_counter()
self.model.execute()
t2 = time.perf_counter()
return self.output, t2 - t1


def fill_driver_state(msg, ds_result: DriverStateResult):
msg.faceOrientation = [x * REG_SCALE for x in ds_result.face_orientation]
msg.faceOrientation = list(ds_result.face_orientation)
msg.faceOrientationStd = [math.exp(x) for x in ds_result.face_orientation_std]
msg.facePosition = [x * REG_SCALE for x in ds_result.face_position[:2]]
msg.facePosition = list(ds_result.face_position[:2])
msg.facePositionStd = [math.exp(x) for x in ds_result.face_position_std[:2]]
msg.faceProb = float(sigmoid(ds_result.face_prob))
msg.leftEyeProb = float(sigmoid(ds_result.left_eye_prob))
Expand All @@ -98,13 +103,13 @@ def fill_driver_state(msg, ds_result: DriverStateResult):
msg.readyProb = [float(sigmoid(x)) for x in ds_result.ready_prob]
msg.notReadyProb = [float(sigmoid(x)) for x in ds_result.not_ready_prob]

def get_driverstate_packet(model_output: np.ndarray, frame_id: int, location_ts: int, execution_time: float, dsp_execution_time: float):
def get_driverstate_packet(model_output: np.ndarray, frame_id: int, location_ts: int, execution_time: float, gpu_execution_time: float):
model_result = ctypes.cast(model_output.ctypes.data, ctypes.POINTER(DMonitoringModelResult)).contents
msg = messaging.new_message('driverStateV2', valid=True)
ds = msg.driverStateV2
ds.frameId = frame_id
ds.modelExecutionTime = execution_time
ds.dspExecutionTime = dsp_execution_time
ds.gpuExecutionTime = gpu_execution_time
ds.poorVisionProb = float(sigmoid(model_result.poor_vision_prob))
ds.wheelOnRightProb = float(sigmoid(model_result.wheel_on_right_prob))
ds.rawPredictions = model_output.tobytes() if SEND_RAW_PRED else b''
Expand All @@ -115,14 +120,16 @@ def get_driverstate_packet(model_output: np.ndarray, frame_id: int, location_ts:

def main():
gc.disable()
setproctitle(PROCESS_NAME)
set_realtime_priority(1)

model = ModelState()
cl_context = CLContext()
model = ModelState(cl_context)
cloudlog.warning("models loaded, dmonitoringmodeld starting")
Params().put_bool("DmModelInitialized", True)

cloudlog.warning("connecting to driver stream")
vipc_client = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_DRIVER, True)
vipc_client = VisionIpcClient("camerad", VisionStreamType.VISION_STREAM_DRIVER, True, cl_context)
while not vipc_client.connect(False):
time.sleep(0.1)
assert vipc_client.is_connected()
Expand All @@ -144,10 +151,10 @@ def main():
calib[:] = np.array(sm["liveCalibration"].rpyCalib)

t1 = time.perf_counter()
model_output, dsp_execution_time = model.run(buf, calib)
model_output, gpu_execution_time = model.run(buf, calib)
t2 = time.perf_counter()

pm.send("driverStateV2", get_driverstate_packet(model_output, vipc_client.frame_id, vipc_client.timestamp_sof, t2 - t1, dsp_execution_time))
pm.send("driverStateV2", get_driverstate_packet(model_output, vipc_client.frame_id, vipc_client.timestamp_sof, t2 - t1, gpu_execution_time))
# print("dmonitoring process: %.2fms, from last %.2fms\n" % (t2 - t1, t1 - last))
# last = t1

Expand Down
4 changes: 2 additions & 2 deletions selfdrive/modeld/models/dmonitoring_model.current
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
5ec97a39-0095-4cea-adfa-6d72b1966cc1
26cac7a9757a27c783a365403040a1bd27ccdaea
fa69be01-b430-4504-9d72-7dcb058eb6dd
d9fb22d1c4fa3ca3d201dbc8edf1d0f0918e53e6
4 changes: 2 additions & 2 deletions selfdrive/modeld/models/dmonitoring_model.onnx
Git LFS file not shown
3 changes: 0 additions & 3 deletions selfdrive/modeld/models/dmonitoring_model_q.dlc

This file was deleted.

3 changes: 1 addition & 2 deletions selfdrive/modeld/runners/onnxmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ class ONNXModel(RunModel):
def __init__(self, path, output, runtime, use_tf8, cl_context):
self.inputs = {}
self.output = output
self.use_tf8 = use_tf8

self.session = create_ort_session(path, fp16_to_fp32=True)
self.input_names = [x.name for x in self.session.get_inputs()]
Expand All @@ -91,7 +90,7 @@ def getCLBuffer(self, name):
return None

def execute(self):
inputs = {k: (v.view(np.uint8) / 255. if self.use_tf8 and k == 'input_img' else v) for k,v in self.inputs.items()}
inputs = {k: v.view(self.input_dtypes[k]) for k,v in self.inputs.items()}
inputs = {k: v.reshape(self.input_shapes[k]).astype(self.input_dtypes[k]) for k,v in inputs.items()}
outputs = self.session.run(None, inputs)
assert len(outputs) == 1, "Only single model outputs are supported"
Expand Down
4 changes: 2 additions & 2 deletions selfdrive/monitoring/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def __init__(self):
self._SG_THRESHOLD = 0.9
self._BLINK_THRESHOLD = 0.865

self._EE_THRESH11 = 0.25
self._EE_THRESH12 = 7.5
self._EE_THRESH11 = 0.4
self._EE_THRESH12 = 15.0
self._EE_MAX_OFFSET1 = 0.06
self._EE_MIN_OFFSET1 = 0.025
self._EE_THRESH21 = 0.01
Expand Down
2 changes: 1 addition & 1 deletion selfdrive/test/process_replay/model_replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def model_replay(lr, frs):
'modelV2.frameDropPerc',
'modelV2.modelExecutionTime',
'driverStateV2.modelExecutionTime',
'driverStateV2.dspExecutionTime'
'driverStateV2.gpuExecutionTime'
]
if PC:
# TODO We ignore whole bunch so we can compare important stuff
Expand Down
2 changes: 1 addition & 1 deletion selfdrive/test/process_replay/model_replay_ref_commit
Original file line number Diff line number Diff line change
@@ -1 +1 @@
666448fce191e196aac68d06e29a0745e6620db9
7cd64f431b814adfa11118643efe3822c496922b
2 changes: 1 addition & 1 deletion selfdrive/test/process_replay/process_replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,7 @@ def selfdrived_config_callback(params, cfg, lr):
proc_name="dmonitoringmodeld",
pubs=["liveCalibration", "driverCameraState"],
subs=["driverStateV2"],
ignore=["logMonoTime", "driverStateV2.modelExecutionTime", "driverStateV2.dspExecutionTime"],
ignore=["logMonoTime", "driverStateV2.modelExecutionTime", "driverStateV2.gpuExecutionTime"],
should_recv_callback=dmonitoringmodeld_rcv_callback,
tolerance=NUMPY_TOLERANCE,
processing_time=0.020,
Expand Down
3 changes: 2 additions & 1 deletion selfdrive/test/test_onroad.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
* total CPU usage of openpilot (sum(PROCS.values())
should not exceed MAX_TOTAL_CPU
"""

MAX_TOTAL_CPU = 265. # total for all 8 cores
PROCS = {
# Baseline CPU usage by process
Expand Down Expand Up @@ -312,7 +313,7 @@ def test_memory_usage(self):
assert max(mems) - min(mems) <= 3.0

def test_gpu_usage(self):
assert self.gpu_procs == {"weston", "ui", "camerad", "selfdrive.modeld.modeld"}
assert self.gpu_procs == {"weston", "ui", "camerad", "selfdrive.modeld.modeld", "selfdrive.modeld.dmonitoringmodeld"}

def test_camera_processing_time(self):
result = "\n"
Expand Down
2 changes: 1 addition & 1 deletion system/hardware/tici/tests/test_power_draw.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def name(self):
PROCS = [
Proc(['camerad'], 2.1, msgs=['roadCameraState', 'wideRoadCameraState', 'driverCameraState']),
Proc(['modeld'], 1.12, atol=0.2, msgs=['modelV2']),
Proc(['dmonitoringmodeld'], 0.4, msgs=['driverStateV2']),
Proc(['dmonitoringmodeld'], 0.5, msgs=['driverStateV2']),
Proc(['encoderd'], 0.23, msgs=[]),
]

Expand Down
2 changes: 1 addition & 1 deletion system/manager/process_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def and_(*fns):
PythonProcess("micd", "system.micd", iscar),
PythonProcess("timed", "system.timed", always_run, enabled=not PC),

PythonProcess("dmonitoringmodeld", "selfdrive.modeld.dmonitoringmodeld", driverview, enabled=(not PC or WEBCAM)),
NativeProcess("dmonitoringmodeld", "selfdrive/modeld", ["./dmonitoringmodeld"], driverview, enabled=(not PC or WEBCAM)),
NativeProcess("encoderd", "system/loggerd", ["./encoderd"], only_onroad),
NativeProcess("stream_encoderd", "system/loggerd", ["./encoderd", "--stream"], notcar),
NativeProcess("loggerd", "system/loggerd", ["./loggerd"], logging),
Expand Down
2 changes: 1 addition & 1 deletion tinygrad_repo
Loading