diff --git a/depthai_sdk/docs/source/conf.py b/depthai_sdk/docs/source/conf.py index 3e2765f71..2407ed557 100644 --- a/depthai_sdk/docs/source/conf.py +++ b/depthai_sdk/docs/source/conf.py @@ -22,7 +22,7 @@ author = 'Luxonis' # The full version, including alpha/beta/rc tags -release = '1.11.0' +release = '1.12.0' # -- General configuration --------------------------------------------------- diff --git a/depthai_sdk/examples/CameraComponent/camera_control_with_nn.py b/depthai_sdk/examples/CameraComponent/camera_control_with_nn.py new file mode 100644 index 000000000..f8b228afd --- /dev/null +++ b/depthai_sdk/examples/CameraComponent/camera_control_with_nn.py @@ -0,0 +1,10 @@ +from depthai_sdk import OakCamera + +with OakCamera() as oak: + color = oak.create_camera('color') + face_det = oak.create_nn('face-detection-retail-0004', color) + # Control the camera's exposure/focus based on the (largest) detected face + color.control_with_nn(face_det, auto_focus=True, auto_exposure=True, debug=False) + + oak.visualize(face_det, fps=True) + oak.start(blocking=True) diff --git a/depthai_sdk/examples/StereoComponent/stereo_encoded.py b/depthai_sdk/examples/StereoComponent/stereo_encoded.py index 0b5660f09..8ca1eb471 100644 --- a/depthai_sdk/examples/StereoComponent/stereo_encoded.py +++ b/depthai_sdk/examples/StereoComponent/stereo_encoded.py @@ -6,7 +6,7 @@ stereo = oak.create_stereo('800p', fps=30, encode='h264') # Set on-device output colorization, works only for encoded output - stereo.set_colormap(dai.Colormap.STEREO_JET) + stereo.set_colormap(dai.Colormap.JET) oak.visualize(stereo.out.encoded, fps=True) oak.start(blocking=True) diff --git a/depthai_sdk/examples/trigger_action/custom_trigger.py b/depthai_sdk/examples/trigger_action/custom_trigger.py index 735173805..c69abbedd 100644 --- a/depthai_sdk/examples/trigger_action/custom_trigger.py +++ b/depthai_sdk/examples/trigger_action/custom_trigger.py @@ -27,7 +27,7 @@ def my_condition(packet) -> bool: with OakCamera() as oak: color = oak.create_camera('color', fps=30) stereo = oak.create_stereo('800p') - stereo.config_stereo(align='color') + stereo.config_stereo(align=color) trigger = Trigger(input=stereo.out.depth, condition=my_condition, cooldown=30) action = RecordAction( diff --git a/depthai_sdk/requirements.txt b/depthai_sdk/requirements.txt index d1a5f96fa..162165241 100644 --- a/depthai_sdk/requirements.txt +++ b/depthai_sdk/requirements.txt @@ -4,7 +4,7 @@ opencv-contrib-python>4 blobconverter>=1.4.1 pytube>=12.1.0 --extra-index-url https://artifacts.luxonis.com/artifactory/luxonis-python-snapshot-local/ -depthai==2.21.2 +depthai==2.22.0 PyTurboJPEG==1.6.4 marshmallow==3.17.0 xmltodict diff --git a/depthai_sdk/sdk_tests/components/nn/test_nn_component.py b/depthai_sdk/sdk_tests/components/nn/test_nn_component.py index 5e95f5556..e5156d092 100644 --- a/depthai_sdk/sdk_tests/components/nn/test_nn_component.py +++ b/depthai_sdk/sdk_tests/components/nn/test_nn_component.py @@ -91,3 +91,15 @@ def callback(packet): if not oak_camera.poll(): raise RuntimeError('Polling failed') time.sleep(0.1) + + +def test_encoded_output(): + with OakCamera() as oak_camera: + camera = oak_camera.create_camera('color', '1080p', encode='h264') + + oak_camera.callback(camera.out.encoded, lambda x: print(x)) + oak_camera.start(blocking=False) + + for i in range(10): + oak_camera.poll() + time.sleep(0.1) diff --git a/depthai_sdk/sdk_tests/components/stereo/test_stereo_component.py b/depthai_sdk/sdk_tests/components/stereo/test_stereo_component.py index 2b7bf91c3..f0ef137de 100644 --- a/depthai_sdk/sdk_tests/components/stereo/test_stereo_component.py +++ b/depthai_sdk/sdk_tests/components/stereo/test_stereo_component.py @@ -1,8 +1,9 @@ import time +import depthai as dai import pytest + from depthai_sdk.oak_camera import OakCamera -import depthai as dai def test_stereo_output(): @@ -10,10 +11,11 @@ def test_stereo_output(): if dai.CameraBoardSocket.LEFT not in oak_camera.sensors: pytest.skip('Looks like camera does not have mono pair, skipping...') else: - stereo = oak_camera.create_stereo('400p') + stereo = oak_camera.create_stereo('800p', encode='h264') oak_camera.callback([stereo.out.depth, stereo.out.disparity, - stereo.out.rectified_left, stereo.out.rectified_right], callback=lambda x: None) + stereo.out.rectified_left, stereo.out.rectified_right, + stereo.out.encoded], callback=lambda x: None) oak_camera.start(blocking=False) for i in range(10): diff --git a/depthai_sdk/sdk_tests/test_examples.py b/depthai_sdk/sdk_tests/test_examples.py index 9bb6946db..636c018be 100644 --- a/depthai_sdk/sdk_tests/test_examples.py +++ b/depthai_sdk/sdk_tests/test_examples.py @@ -1,27 +1,38 @@ +import os import subprocess import sys import time from pathlib import Path import cv2 +import pytest -EXAMPLES_DIR = Path(__file__).parent.parent.parent / "examples" +EXAMPLES_DIR = Path(__file__).parents[1] / 'examples' +# Create a temporary directory for the tests +Path('/tmp/depthai_sdk_tests').mkdir(exist_ok=True) +os.chdir('/tmp/depthai_sdk_tests') -def test_examples(): - python_executable = Path(sys.executable) - for example in EXAMPLES_DIR.rglob("**/*.py"): - print(f"Running example: {example.name}") - - result = subprocess.Popen(f"{python_executable} {example}", stdout=subprocess.PIPE, stderr=subprocess.PIPE, - env={"DISPLAY": ""}, shell=True) - - time.sleep(5) - result.kill() - time.sleep(5) - print('Stderr: ', result.stderr.read().decode()) - # if result.returncode and result.returncode != 0: - # assert False, f"{example} raised an exception: {result.stderr}" - - cv2.destroyAllWindows() +@pytest.mark.parametrize('example', list(EXAMPLES_DIR.rglob("**/*.py"))) +def test_examples(example): + print(f"Running {example}") + python_executable = Path(sys.executable) + result = subprocess.Popen(f"{python_executable} {example}", + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env={ + 'DISPLAY': '', + 'PYTHONPATH': f'{os.environ["PYTHONPATH"]}:{EXAMPLES_DIR.parent}' + }, + shell=True) + + time.sleep(5) + result.kill() + time.sleep(5) + print('Stderr: ', result.stderr.read().decode()) + + if result.returncode and result.returncode != 0: + assert False, f"{example} raised an exception: {result.stderr}" + + cv2.destroyAllWindows() diff --git a/depthai_sdk/setup.py b/depthai_sdk/setup.py index 415048309..d78ea7f06 100644 --- a/depthai_sdk/setup.py +++ b/depthai_sdk/setup.py @@ -9,7 +9,7 @@ setup( name='depthai-sdk', - version='1.11.0', + version='1.12.0', description='This package provides an abstraction of the DepthAI API library.', long_description=io.open("README.md", encoding="utf-8").read(), long_description_content_type="text/markdown", @@ -30,7 +30,8 @@ "replay": ['mcap>=0.0.10', 'mcap-ros1-support==0.0.8', 'rosbags==0.9.11'], - "record": ['av'] + "record": ['av'], + "test": ['pytest'] }, project_urls={ "Bug Tracker": "https://github.com/luxonis/depthai/issues", diff --git a/depthai_sdk/src/depthai_sdk/__init__.py b/depthai_sdk/src/depthai_sdk/__init__.py index c3fbab8ae..abfe2341c 100644 --- a/depthai_sdk/src/depthai_sdk/__init__.py +++ b/depthai_sdk/src/depthai_sdk/__init__.py @@ -1,5 +1,6 @@ from depthai_sdk.args_parser import ArgsParser from depthai_sdk.classes.enum import ResizeMode +from depthai_sdk.constants import CV2_HAS_GUI_SUPPORT from depthai_sdk.logger import set_logging_level from depthai_sdk.oak_camera import OakCamera from depthai_sdk.oak_device import OakDevice @@ -10,7 +11,7 @@ from depthai_sdk.utils import _create_config, get_config_field from depthai_sdk.visualize import * -__version__ = '1.11.0' +__version__ = '1.12.0' def __import_sentry(sentry_dsn: str) -> None: diff --git a/depthai_sdk/src/depthai_sdk/classes/output_config.py b/depthai_sdk/src/depthai_sdk/classes/output_config.py index 5551109a3..77a6144a9 100644 --- a/depthai_sdk/src/depthai_sdk/classes/output_config.py +++ b/depthai_sdk/src/depthai_sdk/classes/output_config.py @@ -16,10 +16,21 @@ from depthai_sdk.trigger_action.triggers.abstract_trigger import Trigger from depthai_sdk.visualize.visualizer import Visualizer +def find_new_name(name: str, names: List[str]): + while True: + arr = name.split(' ') + num = arr[-1] + if num.isnumeric(): + arr[-1] = str(int(num) + 1) + name = " ".join(arr) + else: + name = f"{name} 2" + if name not in names: + return name class BaseConfig: @abstractmethod - def setup(self, pipeline: dai.Pipeline, device, names: List[str]) -> List[XoutBase]: + def setup(self, pipeline: dai.Pipeline, device: dai.Device, names: List[str]) -> List[XoutBase]: raise NotImplementedError() @@ -39,24 +50,12 @@ def __init__(self, output: Callable, self.visualizer_enabled = visualizer_enabled self.record_path = record_path - def find_new_name(self, name: str, names: List[str]): - while True: - arr = name.split(' ') - num = arr[-1] - if num.isnumeric(): - arr[-1] = str(int(num) + 1) - name = " ".join(arr) - else: - name = f"{name} 2" - if name not in names: - return name - def setup(self, pipeline: dai.Pipeline, device, names: List[str]) -> List[XoutBase]: xoutbase: XoutBase = self.output(pipeline, device) xoutbase.setup_base(self.callback) if xoutbase.name in names: # Stream name already exist, append a number to it - xoutbase.name = self.find_new_name(xoutbase.name, names) + xoutbase.name = find_new_name(xoutbase.name, names) names.append(xoutbase.name) recorder = None diff --git a/depthai_sdk/src/depthai_sdk/classes/packets.py b/depthai_sdk/src/depthai_sdk/classes/packets.py index d53d61bcd..0fa36c904 100644 --- a/depthai_sdk/src/depthai_sdk/classes/packets.py +++ b/depthai_sdk/src/depthai_sdk/classes/packets.py @@ -39,6 +39,7 @@ class _TrackingDetection(_Detection): class _TwoStageDetection(_Detection): nn_data: dai.NNData + class NNDataPacket: """ Contains only dai.NNData message @@ -50,6 +51,7 @@ def __init__(self, name: str, nn_data: dai.NNData): self.name = name self.msg = nn_data + class FramePacket: """ Contains only dai.ImgFrame message and cv2 frame, which is used by visualization logic. @@ -80,6 +82,7 @@ def __init__(self, self.color_frame = color_frame self.visualizer = visualizer + class DepthPacket(FramePacket): mono_frame: dai.ImgFrame diff --git a/depthai_sdk/src/depthai_sdk/components/camera_component.py b/depthai_sdk/src/depthai_sdk/components/camera_component.py index f3aaf679e..4122c9375 100644 --- a/depthai_sdk/src/depthai_sdk/components/camera_component.py +++ b/depthai_sdk/src/depthai_sdk/components/camera_component.py @@ -46,6 +46,7 @@ def __init__(self, """ super().__init__() self.out = self.Out(self) + self._pipeline = pipeline self.node: Optional[Union[dai.node.ColorCamera, dai.node.MonoCamera, dai.node.XLinkIn]] = None self.encoder: Optional[dai.node.VideoEncoder] = None @@ -279,6 +280,40 @@ def _config_camera_args(self, args: Dict): else: # Replay self.config_camera(fps=args.get('fps', None)) + def control_with_nn(self, detection_component: 'NNComponent', auto_focus=True, auto_exposure=True, debug=False): + """ + Control the camera AF/AE/AWB based on the object detection results. + + :param detection_component: NNComponent that will be used to control the camera + :param auto_focus: Enable auto focus to the object + :param auto_exposure: Enable auto exposure to the object + :param auto_white_balance: auto white balance to the object + """ + + if not auto_focus and not auto_exposure: + logging.error( + 'Attempted to control camera with NN, but both Auto-Focus and Auto-Exposure were disabled! Attempt ignored.' + ) + return + if 'NNComponent' not in str(type(detection_component)): + raise ValueError('nn_component must be an instance of NNComponent!') + if not detection_component._is_detector(): + raise ValueError('nn_component must be a object detection model (YOLO/MobileNetSSD based)!') + + from depthai_sdk.components.control_camera_with_nn import control_camera_with_nn + + control_camera_with_nn( + pipeline=self._pipeline, + camera_control=self.node.inputControl, + nn_output=detection_component.node.out, + resize_mode=detection_component._ar_resize_mode, + resolution=self.node.getResolution(), + nn_size = detection_component._size, + af=auto_focus, + ae=auto_exposure, + debug=debug + ) + def config_color_camera(self, interleaved: Optional[bool] = None, color_order: Union[None, dai.ColorCameraProperties.ColorOrder, str] = None, diff --git a/depthai_sdk/src/depthai_sdk/components/camera_helper.py b/depthai_sdk/src/depthai_sdk/components/camera_helper.py index 6fe6482c1..2fc802319 100644 --- a/depthai_sdk/src/depthai_sdk/components/camera_helper.py +++ b/depthai_sdk/src/depthai_sdk/components/camera_helper.py @@ -179,6 +179,18 @@ def get_res(resolutions: Dict[Any, Tuple[int,int]]): else: raise Exception('Camera sensor type unknown!', type) +def get_resolution_size( + resolution: Union[ + dai.ColorCameraProperties.SensorResolution, + dai.MonoCameraProperties.SensorResolution + ]) -> Tuple[int,int]: + if resolution in colorResolutions: + return colorResolutions[resolution] + elif resolution in monoResolutions: + return monoResolutions[resolution] + else: + raise Exception('Camera sensor resolution unknown!', resolution) + def getClosesResolution(sensor: dai.CameraFeatures, type: dai.CameraSensorType, width: Optional[int] = None, diff --git a/depthai_sdk/src/depthai_sdk/components/control_camera_with_nn.py b/depthai_sdk/src/depthai_sdk/components/control_camera_with_nn.py new file mode 100644 index 000000000..f804f4974 --- /dev/null +++ b/depthai_sdk/src/depthai_sdk/components/control_camera_with_nn.py @@ -0,0 +1,69 @@ +import depthai as dai +from depthai_sdk.classes.enum import ResizeMode +from typing import Union, Tuple +from depthai_sdk.components.camera_helper import get_resolution_size +from pathlib import Path +from string import Template + +def control_camera_with_nn( + pipeline: dai.Pipeline, + camera_control: dai.Node.Input, + nn_output: dai.Node.Output, + resize_mode: ResizeMode, + resolution: Union[dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution], + nn_size: Tuple[int, int], + af: bool, + ae: bool, + debug: bool + ): + sensor_resolution = get_resolution_size(resolution) + # width / height (old ar) + sensor_ar = sensor_resolution[0] / sensor_resolution[1] + # NN ar (new ar) + nn_ar = nn_size[0] / nn_size[1] + + if resize_mode == ResizeMode.LETTERBOX: + padding = (sensor_ar - nn_ar) / 2 + if padding > 0: + init = f"xmin = 0; ymin = {-padding}; xmax = 1; ymax = {1 + padding}" + else: + init = f"xmin = {padding}; ymin = 0; xmax = {1 - padding}; ymax = 1" + elif resize_mode in [ResizeMode.CROP, ResizeMode.FULL_CROP]: + cropping = (1 - (nn_ar / sensor_ar)) / 2 + if cropping < 0: + init = f"xmin = 0; ymin = {-cropping}; xmax = 1; ymax = {1 + cropping}" + else: + init = f"xmin = {cropping}; ymin = 0; xmax = {1 - cropping}; ymax = 1" + else: # Stretch + init = f"xmin=0; ymin=0; xmax=1; ymax=1" + + + resize_str = f"new_xmin=xmin+width*det.xmin; new_ymin=ymin+height*det.ymin; new_xmax=xmin+width*det.xmax; new_ymax=ymin+height*det.ymax;" + denormalize = f"startx=int(new_xmin*{sensor_resolution[0]}); starty=int(new_ymin*{sensor_resolution[1]}); new_width=int((new_xmax-new_xmin)*{sensor_resolution[0]}); new_height=int((new_ymax-new_ymin)*{sensor_resolution[1]});" + control_str = '' + if ae: + control_str += f"control.setAutoExposureRegion(startx, starty, new_width, new_height);" + if af: + control_str += f"control.setAutoFocusRegion(startx, starty, new_width, new_height);" + + + script_node = pipeline.create(dai.node.Script) + script_node.setProcessor(dai.ProcessorType.LEON_CSS) # More stable + + with open(Path(__file__).parent / 'template_control_cam_with_nn.py', 'r') as file: + code = Template(file.read()).substitute( + DEBUG='' if debug else '#', + INIT=init, + RESIZE=resize_str, + DENORMALIZE=denormalize, + CONTROL=control_str + ) + script_node.setScript(code) + + if debug: + print(code) + + # Node linking: + # NN output -> Script -> Camera input + nn_output.link(script_node.inputs['detections']) + script_node.outputs['control'].link(camera_control) diff --git a/depthai_sdk/src/depthai_sdk/components/multi_stage_nn.py b/depthai_sdk/src/depthai_sdk/components/multi_stage_nn.py index 167cb007b..183bea93c 100644 --- a/depthai_sdk/src/depthai_sdk/components/multi_stage_nn.py +++ b/depthai_sdk/src/depthai_sdk/components/multi_stage_nn.py @@ -2,7 +2,7 @@ from pathlib import Path from string import Template from typing import Tuple, Optional, List - +from depthai_sdk.classes.enum import ResizeMode import depthai as dai from depthai_sdk.types import GenericNeuralNetwork @@ -14,6 +14,9 @@ def __init__(self, detection_node: GenericNeuralNetwork, # Object detection node high_res_frames: dai.Node.Output, size: Tuple[int, int], + frame_size: Tuple[int, int], + det_nn_size: Tuple[int, int], + resize_mode: ResizeMode, num_frames_pool: int = 20, ) -> None: """ @@ -21,10 +24,30 @@ def __init__(self, pipeline (dai.Pipeline): Pipeline object detection_node (GenericNeuralNetwork): Object detection NN high_res_frames (dai.Node.Output): Frames corresponding to the detection NN - size (Tuple[int, int]): Size of the frames. + size (Tuple[int, int]): NN input size of the second stage NN + frame_size (Tuple[int, int]): Frame size of the first (detection) stage NN + det_nn_size (Tuple[int, int]): NN input size of the first (detection) stage NN + resize_mode (ResizeMode): Resize mode that was used to resize frame for first (detection) stage debug (bool, optional): Enable debug mode. Defaults to False. num_frames_pool (int, optional): Number of frames to keep in the pool. Defaults to 20. """ + frame_size_ar = frame_size[0] / frame_size[1] + det_nn_size_ar = det_nn_size[0] / det_nn_size[1] + if resize_mode == ResizeMode.LETTERBOX: + padding = (frame_size_ar - det_nn_size_ar) / 2 + if padding > 0: + self.init = f"xmin = 0; ymin = {-padding}; xmax = 1; ymax = {1 + padding}" + else: + self.init = f"xmin = {padding}; ymin = 0; xmax = {1 - padding}; ymax = 1" + elif resize_mode in [ResizeMode.CROP, ResizeMode.FULL_CROP]: + cropping = (1 - (det_nn_size_ar / frame_size_ar)) / 2 + if cropping < 0: + self.init = f"xmin = 0; ymin = {-cropping}; xmax = 1; ymax = {1 + cropping}" + else: + self.init = f"xmin = {cropping}; ymin = 0; xmax = {1 - cropping}; ymax = 1" + else: # Stretch + self.init = f"xmin=0; ymin=0; xmax=1; ymax=1" + self.script: dai.node.Script = pipeline.create(dai.node.Script) self.script.setProcessor(dai.ProcessorType.LEON_CSS) # More stable self._size: Tuple[int, int] = size @@ -61,6 +84,7 @@ def configure(self, CHECK_LABELS=f"if det.label not in {str(whitelist_labels)}: continue" if whitelist_labels else "", WIDTH=str(self._size[0]), HEIGHT=str(self._size[1]), + INIT=self.init, SCALE_BB_XMIN=f"-{scale_bb[0] / 100}" if scale_bb else "", # % to float value SCALE_BB_YMIN=f"-{scale_bb[1] / 100}" if scale_bb else "", SCALE_BB_XMAX=f"+{scale_bb[0] / 100}" if scale_bb else "", diff --git a/depthai_sdk/src/depthai_sdk/components/nn_component.py b/depthai_sdk/src/depthai_sdk/components/nn_component.py index 8994a2d5e..5126d9f46 100644 --- a/depthai_sdk/src/depthai_sdk/components/nn_component.py +++ b/depthai_sdk/src/depthai_sdk/components/nn_component.py @@ -158,29 +158,24 @@ def __init__(self, # Link ImageManip output to NN node self.image_manip.out.link(self.node.input) elif self._is_multi_stage(): - # Calculate crop shape of the object detector. - # TODO: in the future also support stretching and letterboxing resize modes - frame_size = self._input._input.stream_size - nn_size = self._input._size - scale = frame_size[0] / nn_size[0], frame_size[1] / nn_size[1] - i = 0 if scale[0] < scale[1] else 1 - crop = int(scale[i] * nn_size[0]), int(scale[i] * nn_size[1]) - # Here, ImageManip will only crop the high-res frame to correct aspect ratio # (without resizing!) and it also acts as a buffer (by default, its pool size is set to 20). self.image_manip = pipeline.createImageManip() self.image_manip.setNumFramesPool(20) self._input._stream_input.link(self.image_manip.inputImage) + frame_full_size = self._input._input.stream_size if self._input._is_detector(): - self.image_manip.setResize(*crop) - self.image_manip.setMaxOutputFrameSize(crop[0] * crop[1] * 3) + self.image_manip.setMaxOutputFrameSize(frame_full_size[0] * frame_full_size[1] * 3) # Create script node, get HQ frames from input. self._multi_stage_nn = MultiStageNN(pipeline=pipeline, detection_node=self._input.node, high_res_frames=self.image_manip.out, size=self._size, + frame_size=frame_full_size, + det_nn_size=self._input._size, + resize_mode=self._input._ar_resize_mode, num_frames_pool=20) self._multi_stage_nn.out.link(self.node.input) # Cropped frames @@ -197,7 +192,7 @@ def __init__(self, # TODO pass frame on device, and just send config from host self.x_in = pipeline.createXLinkIn() self.x_in.setStreamName("input_queue") - self.x_in.setMaxDataSize(frame_size[0] * frame_size[1] * 3) + self.x_in.setMaxDataSize(frame_full_size[0] * frame_full_size[1] * 3) self.x_in.out.link(self.image_manip.inputImage) self.x_in_cfg = pipeline.createXLinkIn() @@ -387,7 +382,7 @@ def _change_resize_mode(self, mode: ResizeMode) -> None: mode (ResizeMode): Resize mode to use """ if self._is_multi_stage(): - return # We need high-res frames for multi-stage NN, so we can crop them later + return # We need high-res frames for multi-stage NN, so we can crop them later self._ar_resize_mode = mode @@ -454,7 +449,7 @@ def _parse_label(self, label: Union[str, int]) -> int: def config_tracker(self, tracker_type: Optional[dai.TrackerType] = None, - track_labels: Optional[List[int]] = None, + track_labels: Optional[List[Union[int, str]]] = None, assignment_policy: Optional[dai.TrackerIdAssignmentPolicy] = None, max_obj: Optional[int] = None, threshold: Optional[float] = None, @@ -618,6 +613,12 @@ def main(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutBase: Default output. Streams NN results and high-res frames that were downscaled and used for inferencing. Produces DetectionPacket or TwoStagePacket (if it's 2. stage NNComponent). """ + if self._comp._is_multi_stage(): + input_nn = self._comp._input + if input_nn._input.encoder: + return self.encoded(pipeline=pipeline, device=device) + elif self._comp._input.encoder: + return self.encoded(pipeline=pipeline, device=device) if self._comp._is_multi_stage(): det_nn_out = StreamXout(id=self._comp._input.node.id, @@ -744,12 +745,33 @@ def encoded(self, pipeline: dai.Pipeline, device: dai.Device) -> XoutNnResults: Streams NN results and encoded frames (frames used for inferencing) Produces DetectionPacket or TwoStagePacket (if it's 2. stage NNComponent). """ + if self._comp._is_multi_stage(): + input_nn = self._comp._input + + if input_nn._input.encoder is None: + raise Exception('Encoder not enabled for the input') + + det_nn_out = StreamXout(id=self._comp._input.node.id, + out=self._comp._input.node.out, + name=self._comp._input.name) + frames = StreamXout(id=input_nn._input.encoder.id, + out=input_nn._input.encoder.bitstream, + name=self._comp.name) + second_nn_out = StreamXout(self._comp.node.id, self._comp.node.out, name=self._comp.name) + + out = XoutTwoStage(det_nn=self._comp._input, + second_nn=self._comp, + frames=frames, + det_out=det_nn_out, + second_nn_out=second_nn_out, + device=device, + input_queue_name="input_queue" if self._comp.x_in else None) + + return self._comp._create_xout(pipeline, out) + if self._comp._input.encoder is None: raise Exception('Encoder not enabled for the input') - if self._comp._is_multi_stage(): - raise NotImplementedError('Encoded output not supported for 2-stage NNs at the moment.') - if self._comp._input._encoder_profile == dai.VideoEncoderProperties.Profile.MJPEG: out = XoutNnMjpeg( det_nn=self._comp, diff --git a/depthai_sdk/src/depthai_sdk/components/stereo_component.py b/depthai_sdk/src/depthai_sdk/components/stereo_component.py index 1c813b6d8..9f6f7608d 100644 --- a/depthai_sdk/src/depthai_sdk/components/stereo_component.py +++ b/depthai_sdk/src/depthai_sdk/components/stereo_component.py @@ -1,7 +1,7 @@ import logging import warnings from enum import Enum -from typing import Optional, Union, Any, Dict, Tuple +from typing import Optional, Union, Any, Dict, Tuple, List import cv2 import depthai as dai @@ -123,13 +123,23 @@ def __init__(self, # If not specified, default to 400P resolution for faster processing self._resolution = self._resolution or dai.MonoCameraProperties.SensorResolution.THE_400_P + # Always use 1200p for OAK-D-LR and OAK-D-SR + if self._device.getDeviceName() == 'OAK-D-LR': + self._resolution = dai.MonoCameraProperties.SensorResolution.THE_1200_P + if not self.left: self.left = CameraComponent(device, pipeline, 'left', self._resolution, self._fps, replay=self._replay) if not self.right: self.right = CameraComponent(device, pipeline, 'right', self._resolution, self._fps, replay=self._replay) - if 0 < len(device.getIrDrivers()): + # AR0234 outputs 1200p, so we need to resize it to 800p on RVC2 + if self._device.getDeviceName() == 'OAK-D-LR': + if isinstance(self.left, CameraComponent) and isinstance(self.right, CameraComponent): + self.left.config_color_camera(isp_scale=(2, 3)) + self.right.config_color_camera(isp_scale=(2, 3)) + + if self._get_ir_drivers(): laser = self._args.get('irDotBrightness', None) laser = laser if laser is not None else 800 if 0 < laser: @@ -347,19 +357,28 @@ def set_auto_ir(self, auto_mode: bool, continuous_mode: bool = False) -> None: auto_mode: Enable/disable auto IR. continuous_mode: Enable/disable continious mode. """ - warnings.warn('Auto IR is an experimental feature, which may not work as expected. ' - 'Please report any issues at https://discuss.luxonis.com/t/support/.') - self.ir_settings = { - 'auto_mode': auto_mode, - 'continuous_mode': continuous_mode - } - - def set_ir(self, dot_projector_brightness: int, flood_brightness: int): + if self._get_ir_drivers(): + self.ir_settings = { + 'auto_mode': auto_mode, + 'continuous_mode': continuous_mode + } + self.set_ir(0, 0) + + def set_ir(self, dot_projector_brightness: int = None, flood_brightness: int = None): """ Sets IR brightness and flood. """ - self._device.setIrLaserDotProjectorBrightness(dot_projector_brightness) - self._device.setIrFloodLightBrightness(flood_brightness) + if self._get_ir_drivers(): + if dot_projector_brightness is not None: + self._device.setIrLaserDotProjectorBrightness(dot_projector_brightness) + if flood_brightness is not None: + self._device.setIrFloodLightBrightness(flood_brightness) + + def _get_ir_drivers(self) -> List[Tuple[str, int, int]]: + """ + Returns a list of IR drivers. + """ + return self._device.getIrDrivers() def _get_disparity_factor(self, device: dai.Device) -> float: """ diff --git a/depthai_sdk/src/depthai_sdk/components/template_control_cam_with_nn.py b/depthai_sdk/src/depthai_sdk/components/template_control_cam_with_nn.py new file mode 100644 index 000000000..e153ff9cd --- /dev/null +++ b/depthai_sdk/src/depthai_sdk/components/template_control_cam_with_nn.py @@ -0,0 +1,27 @@ +${INIT} +width=xmax-xmin; height=ymax-ymin; +while True: + detections = node.io['detections'].get() + if len(detections.detections) == 0: + continue + largest_det = detections.detections[0] + largest_size = 0 + for det in detections.detections: + size = (det.xmax - det.xmin) * (det.ymax - det.ymin) + if size > largest_size: + largest_size = size + largest_det = det + det = largest_det + ${DEBUG}node.warn(f"Detected ({det.xmin}, {det.ymin}) ({det.xmax}, {det.ymax})") + ${RESIZE} + if new_xmin < 0: new_xmin = 0.001 + if new_ymin < 0: new_ymin = 0.001 + if new_xmax > 1: new_xmax = 0.999 + if new_ymax > 1: new_ymax = 0.999 + + ${DEBUG}node.warn(f"New ({new_xmin}, {new_ymin}) ({new_xmax}, {new_ymax})") + ${DENORMALIZE} + ${DEBUG}node.warn(f"Denormalized START ({startx}, {starty}) Width: {new_width}, height: {new_height})") + control = CameraControl(1) + ${CONTROL} + node.io['control'].send(control) \ No newline at end of file diff --git a/depthai_sdk/src/depthai_sdk/components/template_multi_stage_script.py b/depthai_sdk/src/depthai_sdk/components/template_multi_stage_script.py index 0c812446c..3e0da3772 100644 --- a/depthai_sdk/src/depthai_sdk/components/template_multi_stage_script.py +++ b/depthai_sdk/src/depthai_sdk/components/template_multi_stage_script.py @@ -1,6 +1,10 @@ import time msgs = dict() cntr = 0 + +${INIT} +width=xmax-xmin; height=ymax-ymin; + def add_msg(msg, name, seq = None): global msgs if seq is None: @@ -32,10 +36,7 @@ def get_msgs(): return None def correct_bb(xmin,ymin,xmax,ymax): - if xmin < 0: xmin = 0.001 - if ymin < 0: ymin = 0.001 - if xmax > 1: xmax = 0.999 - if ymax > 1: ymax = 0.999 + return [xmin,ymin,xmax,ymax] while True: @@ -64,14 +65,19 @@ def correct_bb(xmin,ymin,xmax,ymax): for i, det in enumerate(dets.detections): ${CHECK_LABELS} cfg = ImageManipConfig() - bb = correct_bb( - det.xmin${SCALE_BB_XMIN}, - det.ymin${SCALE_BB_YMIN}, - det.xmax${SCALE_BB_XMAX}, - det.ymax${SCALE_BB_YMAX}, - ) - cfg.setCropRect(*bb) - ${DEBUG}node.warn(f"Sending {i + 1}. det. Det {det.xmin}, {det.ymin}, {det.xmax}, {det.ymax}") + # Change to fit the full-frame + new_xmin=xmin+width*det.xmin + new_ymin=ymin+height*det.ymin + new_xmax=xmin+width*det.xmax + new_ymax=ymin+height*det.ymax + + if new_xmin < 0: new_xmin = 0.001 + if new_ymin < 0: new_ymin = 0.001 + if new_xmax > 1: new_xmax = 0.999 + if new_ymax > 1: new_ymax = 0.999 + + cfg.setCropRect(new_xmin, new_ymin, new_xmax, new_ymax) + ${DEBUG}node.warn(f"Sending {i + 1}. det. Det {new_xmin}, {new_ymin}, {new_xmax}, {new_ymax}") cfg.setResize(${WIDTH}, ${HEIGHT}) cfg.setKeepAspectRatio(False) node.io['manip_cfg'].send(cfg) diff --git a/depthai_sdk/src/depthai_sdk/constants.py b/depthai_sdk/src/depthai_sdk/constants.py new file mode 100644 index 000000000..b0c8c3c02 --- /dev/null +++ b/depthai_sdk/src/depthai_sdk/constants.py @@ -0,0 +1,15 @@ +CV2_HAS_GUI_SUPPORT = False + +try: + import cv2 + import re + + build_info = cv2.getBuildInformation() + gui_support_regex = re.compile(r'GUI: +([A-Z]+)') + gui_support_match = gui_support_regex.search(build_info) + if gui_support_match: + gui_support = gui_support_match.group(1) + if gui_support.upper() != 'NONE': + CV2_HAS_GUI_SUPPORT = True +except ImportError: + pass diff --git a/depthai_sdk/src/depthai_sdk/oak_camera.py b/depthai_sdk/src/depthai_sdk/oak_camera.py index 321eb3d10..a701d194f 100644 --- a/depthai_sdk/src/depthai_sdk/oak_camera.py +++ b/depthai_sdk/src/depthai_sdk/oak_camera.py @@ -5,6 +5,7 @@ from pathlib import Path from typing import Dict, Any, Optional, List, Union, Callable +from depthai_sdk import CV2_HAS_GUI_SUPPORT from depthai_sdk.visualize.visualizer import Visualizer try: @@ -399,7 +400,7 @@ def poll(self) -> Optional[int]: Returns: key pressed from cv2.waitKey, or None if """ - if cv2: + if CV2_HAS_GUI_SUPPORT: key = cv2.waitKey(1) if key == ord('q'): self._stop = True diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/syncing.py b/depthai_sdk/src/depthai_sdk/oak_outputs/syncing.py index 9c1255706..addcac847 100644 --- a/depthai_sdk/src/depthai_sdk/oak_outputs/syncing.py +++ b/depthai_sdk/src/depthai_sdk/oak_outputs/syncing.py @@ -1,3 +1,4 @@ +import threading from typing import Dict, List, Any, Optional @@ -21,23 +22,28 @@ class SequenceNumSync: def __init__(self, stream_num: int): self.msgs: Dict[str, Dict[str, Any]] = dict() self.stream_num: int = stream_num + self.lock = threading.Lock() def sync(self, seq_num: int, name: str, msg) -> Optional[Dict]: seq_num = str(seq_num) - if seq_num not in self.msgs: self.msgs[seq_num] = dict() - self.msgs[seq_num][name] = (msg) + with self.lock: + if seq_num not in self.msgs: + self.msgs[seq_num] = dict() - if self.stream_num == len(self.msgs[seq_num]): - # We have sequence num synced frames! - ret = self.msgs[seq_num] + self.msgs[seq_num][name] = (msg) - # Remove previous msgs - new_msgs = {} - for name, msg in self.msgs.items(): - if int(name) > int(seq_num): - new_msgs[name] = msg - self.msgs = new_msgs + if self.stream_num == len(self.msgs[seq_num]): + # We have sequence num synced frames! + ret = self.msgs[seq_num] + + # Remove previous msgs + new_msgs = {} + for name, msg in self.msgs.items(): + if int(name) > int(seq_num): + new_msgs[name] = msg + self.msgs = new_msgs + + return ret - return ret return None diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_disparity.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_disparity.py index 6856ff8dc..e6e8239aa 100644 --- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_disparity.py +++ b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_disparity.py @@ -90,13 +90,17 @@ def on_callback(self, packet) -> None: def visualize(self, packet: DepthPacket): frame = packet.frame disparity_frame = (frame * self.multiplier).astype(np.uint8) + try: + mono_frame = packet.mono_frame.getCvFrame() + except AttributeError: + mono_frame = None stereo_config = self._visualizer.config.stereo if self.use_wls_filter or stereo_config.wls_filter: self.wls_filter.setLambda(self.wls_lambda or stereo_config.wls_lambda) self.wls_filter.setSigmaColor(self.wls_sigma or stereo_config.wls_sigma) - disparity_frame = self.wls_filter.filter(disparity_frame, packet.mono_frame.getCvFrame()) + disparity_frame = self.wls_filter.filter(disparity_frame, mono_frame) colorize = self.colorize or stereo_config.colorize if self.colormap is not None: @@ -105,13 +109,16 @@ def visualize(self, packet: DepthPacket): colormap = stereo_config.colormap colormap[0] = [0, 0, 0] # Invalidate pixels 0 to be black + if mono_frame is not None and disparity_frame.ndim == 2 and mono_frame.ndim == 3: + disparity_frame = disparity_frame[..., np.newaxis] + if colorize == StereoColor.GRAY: packet.frame = disparity_frame elif colorize == StereoColor.RGB: packet.frame = cv2.applyColorMap(disparity_frame, colormap) elif colorize == StereoColor.RGBD: packet.frame = cv2.applyColorMap( - (disparity_frame * 0.5 + packet.mono_frame.getCvFrame() * 0.5).astype(np.uint8), colormap + (disparity_frame * 1.0 + mono_frame * 0.5).astype(np.uint8), colormap ) if self._visualizer.config.output.clickable: diff --git a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_nn.py b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_nn.py index 3e3561338..64641493c 100644 --- a/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_nn.py +++ b/depthai_sdk/src/depthai_sdk/oak_outputs/xout/xout_nn.py @@ -1,3 +1,4 @@ +import threading from typing import List, Union, Dict, Any, Optional, Tuple import depthai as dai @@ -293,6 +294,8 @@ def __init__(self, self.input_queue = None self.input_cfg_queue = None + self.lock = threading.Lock() + def xstreams(self) -> List[StreamXout]: return [self.frames, self.nn_results, self.second_nn_out] @@ -398,11 +401,12 @@ def new_msg(self, name: str, msg: dai.Buffer) -> None: # if int(s) <= int(seq): # del self.msgs[s] - new_msgs = {} - for name, msg in self.msgs.items(): - if int(name) > int(seq): - new_msgs[name] = msg - self.msgs = new_msgs + with self.lock: + new_msgs = {} + for name, msg in self.msgs.items(): + if int(name) > int(seq): + new_msgs[name] = msg + self.msgs = new_msgs def add_detections(self, seq: str, dets: dai.ImgDetections): # Used to match the scaled bounding boxes by the 2-stage NN script node diff --git a/depthai_sdk/src/depthai_sdk/readers/videocap_reader.py b/depthai_sdk/src/depthai_sdk/readers/videocap_reader.py index b637a01bd..725729ce5 100644 --- a/depthai_sdk/src/depthai_sdk/readers/videocap_reader.py +++ b/depthai_sdk/src/depthai_sdk/readers/videocap_reader.py @@ -28,7 +28,8 @@ def __init__(self, path: Path, loop: bool = False) -> None: self._is_looped = loop if path.is_file(): - self.videos[path.stem.lower()] = { + stream_name = path.stem if (path.stem in ['left', 'right']) else 'color' + self.videos[stream_name] = { 'reader': cv2.VideoCapture(str(path)), 'socket': dai.CameraBoardSocket.CAM_A } diff --git a/depthai_sdk/src/depthai_sdk/recorders/video_recorder.py b/depthai_sdk/src/depthai_sdk/recorders/video_recorder.py index d6f2b7330..0582736bd 100644 --- a/depthai_sdk/src/depthai_sdk/recorders/video_recorder.py +++ b/depthai_sdk/src/depthai_sdk/recorders/video_recorder.py @@ -50,7 +50,7 @@ def update(self, path: Path, device: dai.Device, xouts: List['XoutFrames']): else: try: from .video_writers.av_writer import AvWriter - self._writers[xout_name] = AvWriter(self.path, xout_name, fourcc, xout.fps) + self._writers[xout_name] = AvWriter(self.path, xout_name, fourcc, xout.fps, xout._frame_shape) except Exception as e: # TODO here can be other errors, not only import error logging.warning(f'Exception while creating AvWriter: {e}.' diff --git a/depthai_sdk/src/depthai_sdk/recorders/video_writers/av_writer.py b/depthai_sdk/src/depthai_sdk/recorders/video_writers/av_writer.py index caec89b19..62ed798aa 100644 --- a/depthai_sdk/src/depthai_sdk/recorders/video_writers/av_writer.py +++ b/depthai_sdk/src/depthai_sdk/recorders/video_writers/av_writer.py @@ -1,48 +1,128 @@ -from datetime import timedelta +import os from fractions import Fraction from pathlib import Path +from typing import Tuple, Union import depthai as dai +import numpy as np -from depthai_sdk.recorders.video_writers import BaseWriter from depthai_sdk.recorders.video_writers.utils import create_writer_dir +from .base_writer import BaseWriter + +START_CODE_PREFIX = b"\x00\x00\x01" +KEYFRAME_NAL_TYPE = 5 +NAL_TYPE_BITS = 5 # nal unit type is encoded in lower 5 bits + + +def is_keyframe(encoded_frame: np.array) -> bool: + """ + Check if encoded frame is a keyframe. + Args: + encoded_frame: Encoded frame. + + Returns: + True if encoded frame is a keyframe, False otherwise. + """ + byte_stream = bytearray(encoded_frame) + size = len(byte_stream) + + pos = 0 + while pos < size: + retpos = byte_stream.find(START_CODE_PREFIX, pos) + if retpos == -1: + return False + + # Skip start code + pos = retpos + 3 + + # Extract the first 5 bits + type_ = byte_stream[pos] >> 3 + + if type_ == KEYFRAME_NAL_TYPE: + return True + + return False class AvWriter(BaseWriter): - def __init__(self, path: Path, name: str, fourcc: str, fps: float): + def __init__(self, path: Path, name: str, fourcc: str, fps: float, frame_shape: Tuple[int, int]): + """ + Args: + path: Path to the folder where the file will be created. + name: Name of the file without extension. + fourcc: Stream codec. + fps: Frames per second of the stream. + frame_shape: Width and height of the frames. + """ super().__init__(path, name) self.start_ts = None + self.frame_shape = frame_shape + self._fps = fps self._fourcc = fourcc + self._stream = None + + def _create_stream(self, fourcc: str, fps: float) -> None: + """ + Create stream in file with given fourcc and fps, works in-place. - def _create_stream(self, fourcc, fps) -> None: - """Create stream in file with given fourcc and fps, works in-place.""" - stream = self._file.add_stream(fourcc, rate=int(fps)) - stream.time_base = Fraction(1, 1000 * 1000) # Microseconds + Args: + fourcc: Stream codec. + fps: Frames per second of the stream. + """ + self._stream = self._file.add_stream(fourcc, rate=int(fps)) + self._stream.time_base = Fraction(1, 1000 * 1000) # Microseconds # We need to set pixel format for MJEPG, for H264/H265 it's yuv420p by default if fourcc == 'mjpeg': - stream.pix_fmt = 'yuvj420p' + self._stream.pix_fmt = 'yuvj420p' - def create_file_for_buffer(self, subfolder: str, buf_name: str): # independent of type of frames + if self.frame_shape is not None: + self._stream.width = self.frame_shape[0] + self._stream.height = self.frame_shape[1] + + def create_file_for_buffer(self, subfolder: str, buf_name: str) -> None: # independent of type of frames self.create_file(subfolder) - def create_file(self, subfolder: str): + def create_file(self, subfolder: str) -> None: + """ + Create file for writing frames. + + Args: + subfolder: Subfolder relative to the main folder where the file will be created. + """ path_to_file = create_writer_dir(self.path / subfolder, self.name, 'mp4') self._create_file(path_to_file) - def _create_file(self, path_to_file: str): + def _create_file(self, path_to_file: str) -> None: + """ + Create av container for writing frames. + + Args: + path_to_file: Path to the file. + """ global av - import av as av - self._file = av.open(path_to_file, 'w') + import av + self._file = av.open(str(Path(path_to_file).with_suffix(f'.{self._fourcc}')), 'w') self._create_stream(self._fourcc, self._fps) def write(self, frame: dai.ImgFrame) -> None: + """ + Write packet bytes to h264 file. + + Args: + frame: ImgFrame from depthai pipeline. + """ if self._file is None: self.create_file(subfolder='') - packet = av.Packet(frame.getData()) # Create new packet with byte array + frame_data = frame.getData() + + if self.start_ts is None and not is_keyframe(frame_data): + return + + packet = av.Packet(frame_data) # Create new packet with byte array # Set frame timestamp if self.start_ts is None: @@ -51,9 +131,50 @@ def write(self, frame: dai.ImgFrame) -> None: ts = int((frame.getTimestampDevice() - self.start_ts).total_seconds() * 1e6) # To microsec packet.dts = ts packet.pts = ts - self._file.mux_one(packet) # Mux the Packet into container - def close(self): + def close(self) -> None: + """ + Close the file and remux it to mp4. + """ if self._file is not None: + p = self._stream.encode(None) + self._file.mux(p) self._file.close() + + # Remux the stream to finalize the output file + self.remux_video(str(self._file.name)) + + def remux_video(self, input_file: Union[Path, str]) -> None: + """ + Remuxes h264 file to mp4. + + Args: + input_file: path to h264 file. + """ + + mp4_file = str(Path(input_file).with_suffix('.mp4')) + + if input_file == mp4_file: + mp4_file = str(Path(input_file).with_suffix('.remuxed.mp4')) + + with av.open(mp4_file, "w", format="mp4") as output_container, \ + av.open(input_file, "r", format=self._fourcc) as input_container: + input_stream = input_container.streams[0] + output_stream = output_container.add_stream(template=input_stream, rate=self._fps) + + if self.frame_shape: + output_stream.width = self.frame_shape[0] + output_stream.height = self.frame_shape[1] + + frame_time = (1 / self._fps) * input_stream.time_base.denominator + for i, packet in enumerate(input_container.demux(input_stream)): + packet.dts = i * frame_time + packet.pts = i * frame_time + packet.stream = output_stream + output_container.mux_one(packet) + + os.remove(input_file) + + if Path(mp4_file).suffix == '.remuxed.mp4': + os.rename(mp4_file, input_file) diff --git a/depthai_sdk/src/depthai_sdk/trigger_action/actions/record_action.py b/depthai_sdk/src/depthai_sdk/trigger_action/actions/record_action.py index e78452d86..e249fe33d 100644 --- a/depthai_sdk/src/depthai_sdk/trigger_action/actions/record_action.py +++ b/depthai_sdk/src/depthai_sdk/trigger_action/actions/record_action.py @@ -96,7 +96,7 @@ def _run(self): # Call on_finish_callback if it is specified if self.on_finish_callback is not None: - self.on_finish_callback(self.path) + self.on_finish_callback(str(self.path / subfolder)) def activate(self): # Setup for the current recording diff --git a/depthai_sdk/src/depthai_sdk/utils.py b/depthai_sdk/src/depthai_sdk/utils.py index f2b835496..516642637 100644 --- a/depthai_sdk/src/depthai_sdk/utils.py +++ b/depthai_sdk/src/depthai_sdk/utils.py @@ -429,7 +429,7 @@ def _create_config() -> None: config_file = Path.home().joinpath('.depthai_sdk', 'config.json') default_config = { 'sentry': True, - 'sentry_dsn': 'https://981545d5effd480d883f3ff0b1306e49@o1095304.ingest.sentry.io/4504685274791936' + 'sentry_dsn': 'https://67bc97fb3ee947bf90d83c892eaf19fe@sentry.luxonis.com/3' } if not config_file.exists(): config_file.write_text(json.dumps(default_config)) diff --git a/depthai_sdk/src/depthai_sdk/visualize/configs.py b/depthai_sdk/src/depthai_sdk/visualize/configs.py index 9d1e43a5c..8ee2b43d6 100644 --- a/depthai_sdk/src/depthai_sdk/visualize/configs.py +++ b/depthai_sdk/src/depthai_sdk/visualize/configs.py @@ -1,6 +1,6 @@ from dataclasses import dataclass, field from enum import IntEnum -from typing import Tuple +from typing import Tuple, Optional import numpy as np try: @@ -81,8 +81,10 @@ class TextConfig: font_thickness: int = 2 font_position: TextPosition = TextPosition.TOP_LEFT - bg_transparency: float = 0.5 - bg_color: Tuple[int, int, int] = (0, 0, 0) + background_color: Optional[Tuple[int, int, int]] = None + background_transparency: float = 0.5 + + outline_color: Tuple[int, int, int] = (0, 0, 0) line_type: int = 16 # cv2.LINE_AA diff --git a/depthai_sdk/src/depthai_sdk/visualize/objects.py b/depthai_sdk/src/depthai_sdk/visualize/objects.py index ba7edf607..8fe4991ee 100644 --- a/depthai_sdk/src/depthai_sdk/visualize/objects.py +++ b/depthai_sdk/src/depthai_sdk/visualize/objects.py @@ -296,6 +296,9 @@ def __init__(self, detections: List[Union[ImgDetection, dai.Tracklet]], normalizer: BoundingBox, label_map: List[Tuple[str, Tuple]] = None, + label_color: Tuple[int, int, int] = None, + label_background_color: Tuple[int, int, int] = None, + label_background_transparency: float = None, spatial_points: List[dai.Point3f] = None, is_spatial=False, parent_bbox: Union[np.ndarray, Tuple[int, int, int, int]] = None): @@ -312,6 +315,9 @@ def __init__(self, self.detections = detections self.normalizer = normalizer self.label_map = label_map + self.label_color = label_color + self.label_background_color = label_background_color + self.label_background_transparency = label_background_transparency self.spatial_points = spatial_points self.is_spatial = is_spatial self.parent_bbox = parent_bbox @@ -331,7 +337,10 @@ def serialize(self) -> dict: 'detections': [{ 'bbox': bbox.to_tuple(frame_shape=self.frame_shape) if isinstance(bbox, BoundingBox) else bbox, 'label': label, - 'color': color + 'color': color, + 'label_color': self.label_color, + 'label_background_color': self.label_background_color, + 'label_background_transparency': self.label_background_transparency } for bbox, label, color in list(self.get_detections())] } if len(self._children) > 0: @@ -385,12 +394,17 @@ def prepare(self) -> 'VisDetections': # Add spatial coordinates self.add_child(VisText(f'{spatial_coords.x}\n{spatial_coords.y}\n{spatial_coords.z}', + color=self.label_color or color, bbox=normalized_bbox, - position=TextPosition.BOTTOM_RIGHT)) + position=TextPosition.BOTTOM_RIGHT, + background_color=self.label_background_color, + background_transparency=self.label_background_transparency)) if cv2 and not detection_config.hide_label and len(label) > 0: # Place label in the bounding box - self.add_child(VisText(text=label.capitalize(), bbox=normalized_bbox, + self.add_child(VisText(text=label.capitalize(), + color=self.label_color or color, + bbox=normalized_bbox, position=detection_config.label_position, padding=detection_config.label_padding)) @@ -438,6 +452,8 @@ def __init__(self, color: Tuple[int, int, int] = None, thickness: int = None, outline: bool = True, + background_color: Tuple[int, int, int] = None, + background_transparency: float = 0.5, bbox: Union[np.ndarray, Tuple[int, int, int, int], BoundingBox] = None, position: TextPosition = TextPosition.TOP_LEFT, padding: int = 10): @@ -456,6 +472,8 @@ def __init__(self, color: Text color. thickness: Font thickness. outline: Enable outline if set to True, disable otherwise. + background_color: Background color. + background_transparency: Background transparency. bbox: Bounding box where to place text. position: Position w.r.t. to frame (or bbox if is set). padding: Padding. @@ -467,6 +485,8 @@ def __init__(self, self.color = color self.thickness = thickness self.outline = outline + self.background_color = background_color + self.background_transparency = background_transparency self.bbox = bbox self.position = position self.padding = padding @@ -479,6 +499,8 @@ def serialize(self): 'color': self.color, 'thickness': self.thickness, 'outline': self.outline, + 'background_color': self.background_color, + 'background_transparency': self.background_transparency } def prepare(self) -> 'VisText': @@ -505,7 +527,6 @@ def draw(self, frame: np.ndarray) -> None: # Extract shape of the bbox if exists if self.bbox is not None: - # shape = self.bbox[2] - self.bbox[0], self.bbox[3] - self.bbox[1] tl, br = self.bbox.denormalize(frame.shape) shape = br[0] - tl[0], br[1] - tl[1] else: @@ -519,11 +540,28 @@ def draw(self, frame: np.ndarray) -> None: font_thickness = max(1, int(font_scale * 2)) \ if text_config.auto_scale else self.thickness or text_config.font_thickness - dy = cv2.getTextSize(self.text, text_config.font_face, font_scale, font_thickness)[0][1] + 10 + dx, dy = cv2.getTextSize(self.text, text_config.font_face, font_scale, font_thickness)[0] + dy += 10 for line in self.text.splitlines(): y = self.coords[1] + background_color = self.background_color or text_config.background_color + background_transparency = self.background_transparency or text_config.background_transparency + if background_color is not None: + img_with_background = cv2.rectangle(img=frame.copy(), + pt1=(self.coords[0], y - dy), + pt2=(self.coords[0] + dx, y + 10), + color=background_color, + thickness=-1) + # take transparency into account + cv2.addWeighted(src1=img_with_background, + alpha=background_transparency, + src2=frame, + beta=1 - background_transparency, + gamma=0, + dst=frame) + if self.outline: # Background cv2.putText(img=frame, @@ -531,7 +569,7 @@ def draw(self, frame: np.ndarray) -> None: org=self.coords, fontFace=text_config.font_face, fontScale=font_scale, - color=text_config.bg_color, + color=text_config.outline_color, thickness=font_thickness + 1, lineType=text_config.line_type) diff --git a/depthai_sdk/src/depthai_sdk/visualize/visualizer.py b/depthai_sdk/src/depthai_sdk/visualize/visualizer.py index b4c257f37..67d214f69 100644 --- a/depthai_sdk/src/depthai_sdk/visualize/visualizer.py +++ b/depthai_sdk/src/depthai_sdk/visualize/visualizer.py @@ -71,6 +71,7 @@ def add_bbox(self, Args: bbox: Bounding box. label: Label for the detection. + thickness: Bounding box thickness. color: Bounding box color (RGB). bbox_style: Bounding box style (one of depthai_sdk.visualize.configs.BboxStyle). @@ -90,6 +91,9 @@ def add_detections(self, normalizer: BoundingBox = None, label_map: List[Tuple[str, Tuple]] = None, spatial_points: List[dai.Point3f] = None, + label_color: Tuple[int, int, int] = None, + label_background_color: Tuple[int, int, int] = None, + label_background_transparency: float = None, is_spatial=False, bbox: Union[np.ndarray, Tuple[int, int, int, int]] = None, ) -> 'Visualizer': @@ -101,8 +105,12 @@ def add_detections(self, normalizer: Normalizer object. label_map: List of tuples (label, color). spatial_points: List of spatial points. None if not spatial. + label_color: Color for the label. + label_background_color: Color for the label background. + label_background_transparency: Transparency for the label background. is_spatial: Flag that indicates if the detections are spatial. bbox: Bounding box, if there's a detection inside a bounding box. + Returns: self """ @@ -110,6 +118,9 @@ def add_detections(self, normalizer=normalizer, label_map=label_map, spatial_points=spatial_points, + label_color=label_color, + label_background_color=label_background_color, + label_background_transparency=label_background_transparency, is_spatial=is_spatial, parent_bbox=bbox) self.add_object(detection_overlay) @@ -122,6 +133,8 @@ def add_text(self, color: Tuple[int, int, int] = None, thickness: int = None, outline: bool = True, + background_color: Tuple[int, int, int] = None, + background_transparency: float = 0.5, bbox: Union[np.ndarray, Tuple[int, ...], BoundingBox] = None, position: TextPosition = TextPosition.TOP_LEFT, padding: int = 10) -> 'Visualizer': @@ -135,6 +148,8 @@ def add_text(self, color: Color of the text. thickness: Thickness of the text. outline: Flag that indicates if the text should be outlined. + background_color: Background color. + background_transparency: Background transparency. bbox: Bounding box. position: Position. padding: Padding. @@ -148,6 +163,8 @@ def add_text(self, color=color, thickness=thickness, outline=outline, + background_color=background_color, + background_transparency=background_transparency, bbox=bbox, position=position, padding=padding) @@ -368,8 +385,9 @@ def text(self, font_scale: float = None, font_thickness: int = None, font_position: TextPosition = None, - bg_transparency: float = None, - bg_color: Tuple[int, int, int] = None, + background_transparency: float = None, + background_color: Tuple[int, int, int] = None, + outline_color: Tuple[int, int, int] = None, line_type: int = None, auto_scale: bool = None) -> 'Visualizer': """ @@ -382,8 +400,9 @@ def text(self, font_scale: Font scale. font_thickness: Font thickness. font_position: Font position. - bg_transparency: Text background transparency. - bg_color: Text background color. + background_transparency: Text background transparency. + background_color: Text background color. + outline_color: Outline color. line_type: Line type (from cv2). auto_scale: Flag that indicates if the font scale should be automatically adjusted.