diff --git a/.vscode/launch.json b/.vscode/launch.json index 4594a159..98bebc0a 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -851,6 +851,69 @@ } }, //#endregion h264_video_decode + + //#region h264_endoscopy_tool_tracking_distributed + { + "name": "(gdb) h264_endoscopy_tool_tracking_distributed/cpp (all fragments)", + "type": "cppdbg", + "request": "launch", + "preLaunchTask": "Build h264_endoscopy_tool_tracking_distributed", + "program": "${workspaceFolder}/build/h264_endoscopy_tool_tracking_distributed/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/h264_endoscopy_tool_tracking_distributed", + "environment": [ + { + "name": "HOLOSCAN_INPUT_PATH", + "value": "${env:HOLOHUB_DATA_DIR}/endoscopy" + } + ], + "stopAtEntry": false, + "cwd": "${workspaceFolder}/build/h264_endoscopy_tool_tracking_distributed/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp", + "externalConsole": false, + "MIMode": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + "presentation": { + "hidden": false, + "group": "h264_endoscopy_tool_tracking_distributed", + "order": 1 + } + }, + { + "name": "(debugpy) h264_endoscopy_tool_tracking_distributed/python (all fragments)", + "type": "debugpy", + "request": "launch", + "preLaunchTask": "Build h264_endoscopy_tool_tracking_distributed", + "program": "${workspaceFolder}/applications/h264/h264_endoscopy_tool_tracking_distributed/python/h264_endoscopy_tool_tracking_distributed.py", + "cwd": "${workspaceFolder}/build/h264_endoscopy_tool_tracking_distributed/", + "env": { + "PYTHONPATH": "${workspaceFolder}/build/h264_endoscopy_tool_tracking_distributed/python/lib:${env:PYTHONPATH}", + "HOLOSCAN_INPUT_PATH": "${env:HOLOHUB_DATA_DIR}/endoscopy" + }, + "args": [ + "--data", + "${env:HOLOHUB_DATA_DIR}/endoscopy" + ], + "presentation": { + "hidden": true, + } + }, + { + "name": "(pythoncpp) h264_endoscopy_tool_tracking_distributed/python (all fragments)", + "type": "pythoncpp", + "request": "launch", + "pythonLaunchName": "(debugpy) h264_endoscopy_tool_tracking_distributed/python (all fragments)", + "cppConfig": "default (gdb) Attach", + "presentation": { + "hidden": false, + "group": "h264_endoscopy_tool_tracking_distributed", + "order": 2 + } + }, + //#endregion h264_endoscopy_tool_tracking_distributed //#region holoviz { "name": "(gdb) Holoviz examples", diff --git a/.vscode/tasks.json b/.vscode/tasks.json index a53a76a1..1c077297 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -289,6 +289,32 @@ "problemMatcher": [], "detail": "CMake template build task" }, + { + "type": "shell", + "label": "Build h264_endoscopy_tool_tracking_distributed", + "command": "./run", + "args": [ + "build", + "h264_endoscopy_tool_tracking_distributed", + "--type", + "debug" + ], + "options": { + "cwd": "${env:WORKSPACE_DIR}" + }, + "group": "build", + "problemMatcher": [], + "detail": "CMake template build task", + "presentation": { + "echo": true, + "reveal": "silent", + "focus": true, + "panel": "dedicated", + "showReuseMessage": false, + "clear": true, + "group": "h264_endoscopy_tool_tracking_distributed" + } + }, { "type": "shell", "label": "Build h264_video_decode", diff --git a/applications/h264/CMakeLists.txt b/applications/h264/CMakeLists.txt index ccc65073..1c19e135 100644 --- a/applications/h264/CMakeLists.txt +++ b/applications/h264/CMakeLists.txt @@ -20,5 +20,11 @@ add_holohub_application(h264_endoscopy_tool_tracking DEPENDS tool_tracking_postprocessor ) +add_holohub_application(h264_endoscopy_tool_tracking_distributed DEPENDS + OPERATORS video_encoder + tensor_to_video_buffer + lstm_tensor_rt_inference + tool_tracking_postprocessor) + add_holohub_application(h264_video_decode DEPENDS - OPERATORS video_decoder video_read_bitstream) \ No newline at end of file + OPERATORS video_decoder video_read_bitstream) diff --git a/applications/h264/h264_endoscopy_tool_tracking/python/CMakeLists.txt b/applications/h264/h264_endoscopy_tool_tracking/python/CMakeLists.txt index 4cedd469..bb229b36 100644 --- a/applications/h264/h264_endoscopy_tool_tracking/python/CMakeLists.txt +++ b/applications/h264/h264_endoscopy_tool_tracking/python/CMakeLists.txt @@ -15,35 +15,36 @@ cmake_minimum_required(VERSION 3.20) find_package(holoscan 2.1.0 REQUIRED CONFIG - PATHS "/opt/nvidia/holoscan" "/workspace/holoscan-sdk/install") + PATHS "/opt/nvidia/holoscan" "/workspace/holoscan-sdk/install") # Enable the operators -add_library(nvidia_nim_imaging INTERFACE) -target_link_libraries(nvidia_nim_imaging INTERFACE - holoscan::core - holoscan::ops::holoviz - holoscan::ops::lstm_tensor_rt_inference - holoscan::ops::tensor_to_video_buffer - holoscan::ops::tool_tracking_postprocessor) +add_library(h264_endoscopy_tool_tracking_python INTERFACE) +target_link_libraries(h264_endoscopy_tool_tracking_python + INTERFACE + holoscan::core + holoscan::ops::holoviz + holoscan::ops::lstm_tensor_rt_inference + holoscan::ops::tensor_to_video_buffer + holoscan::ops::tool_tracking_postprocessor) # Add testing if(BUILD_TESTING) add_test(NAME h264_endoscopy_tool_tracking_python_test - COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/h264_endoscopy_tool_tracking.py - --config ${CMAKE_CURRENT_SOURCE_DIR}/h264_endoscopy_tool_tracking.yaml - --data ${HOLOHUB_DATA_DIR}/endoscopy - WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/h264_endoscopy_tool_tracking.py + --config ${CMAKE_CURRENT_SOURCE_DIR}/h264_endoscopy_tool_tracking.yaml + --data ${HOLOHUB_DATA_DIR}/endoscopy + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) set_property(TEST h264_endoscopy_tool_tracking_python_test PROPERTY ENVIRONMENT - "PYTHONPATH=${GXF_LIB_DIR}/../python/lib:${CMAKE_BINARY_DIR}/python/lib") + "PYTHONPATH=${GXF_LIB_DIR}/../python/lib:${CMAKE_BINARY_DIR}/python/lib") set_tests_properties(h264_endoscopy_tool_tracking_python_test PROPERTIES - PASS_REGULAR_EXPRESSION "Deactivating Graph" - FAIL_REGULAR_EXPRESSION "[^a-z]Error;ERROR;Failed") + PASS_REGULAR_EXPRESSION "Deactivating Graph" + FAIL_REGULAR_EXPRESSION "[^a-z]Error;ERROR;Failed") # For aarch64 LD_LIBRARY_PATH needs to be set if(CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64 OR CMAKE_SYSTEM_PROCESSOR STREQUAL arm64) set_property(TEST h264_endoscopy_tool_tracking_python_test APPEND PROPERTY ENVIRONMENT - "LD_LIBRARY_PATH=/usr/lib/aarch64-linux-gnu/tegra/") + "LD_LIBRARY_PATH=/usr/lib/aarch64-linux-gnu/tegra/") endif() endif() diff --git a/applications/h264/h264_endoscopy_tool_tracking_distributed/CMakeLists.txt b/applications/h264/h264_endoscopy_tool_tracking_distributed/CMakeLists.txt new file mode 100644 index 00000000..ec1107aa --- /dev/null +++ b/applications/h264/h264_endoscopy_tool_tracking_distributed/CMakeLists.txt @@ -0,0 +1,20 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required(VERSION 3.20) +project(h264_endoscopy_tool_tracking_distributed_apps LANGUAGES NONE) + +add_subdirectory(cpp) +add_subdirectory(python) diff --git a/applications/h264/h264_endoscopy_tool_tracking_distributed/README.md b/applications/h264/h264_endoscopy_tool_tracking_distributed/README.md new file mode 100644 index 00000000..01c641d5 --- /dev/null +++ b/applications/h264/h264_endoscopy_tool_tracking_distributed/README.md @@ -0,0 +1,82 @@ +# Distributed H.264 Endoscopy Tool Tracking Application + +This application is similar to the [H.264 Endoscopy Tool Tracking](../h264_endoscopy_tool_tracking/) application, but this distributed version divides the application into three fragments: + +1. Video Input: get video input from a pre-recorded video file. +2. Inference: run the inference using LSTM and run the post-processing script. +3. Visualization: display input video and inference results. + + +## Requirements + +This application is configured to use H.264 elementary stream from endoscopy sample data as input. + +### Data + +[📦️ (NGC) Sample App Data for AI-based Endoscopy Tool Tracking](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/clara-holoscan/resources/holoscan_endoscopy_sample_data) + +The data is automatically downloaded when building the application. + +## Building and Running H.264 Endoscopy Tool Tracking Application + +* Building and running the application from the top level Holohub directory: + +### C++ + +```bash +# Start the application with all three fragments +./dev_container build_and_run h264_endoscopy_tool_tracking_distributed --docker_file applications/h264/Dockerfile --language cpp + +# Use the following commands to run the same application three processes: +# Start the application with the video_in fragment +./dev_container build_and_run h264_endoscopy_tool_tracking_distributed --docker_file applications/h264/Dockerfile --language cpp --run_args "--driver --worker --fragments video_in --address :10000 --worker-address :10001" +# Start the application with the inference fragment +./dev_container build_and_run h264_endoscopy_tool_tracking_distributed --docker_file applications/h264/Dockerfile --language cpp --run_args "--worker --fragments inference --address :10000 --worker-address :10002" +# Start the application with the visualization fragment +./dev_container build_and_run h264_endoscopy_tool_tracking_distributed --docker_file applications/h264/Dockerfile --language cpp --run_args "--worker --fragments viz --address :10000 --worker-address :10003" +``` +--base_img gitlab-master.nvidia.com:5005/holoscan/holoscan-sdk/dev-x86_64:main +### Python + +```bash +# Start the application with all three fragments +./dev_container build_and_run h264_endoscopy_tool_tracking_distributed --docker_file applications/h264/Dockerfile --language python + +# Use the following commands to run the same application three processes: +# Start the application with the video_in fragment +./dev_container build_and_run h264_endoscopy_tool_tracking_distributed --docker_file applications/h264/Dockerfile --language python --run_args "--driver --worker --fragments video_in --address :10000 --worker-address :10001" +# Start the application with the inference fragment +./dev_container build_and_run h264_endoscopy_tool_tracking_distributed --docker_file applications/h264/Dockerfile --language python --run_args "--worker --fragments inference --address :10000 --worker-address :10002" +# Start the application with the visualization fragment +./dev_container build_and_run h264_endoscopy_tool_tracking_distributed --docker_file applications/h264/Dockerfile --language python --run_args "--worker --fragments viz --address :10000 --worker-address :10003" +``` + +Important: on aarch64, applications also need tegra folder mounted inside the container and +the `LD_LIBRARY_PATH` environment variable should be updated to include +tegra folder path. + +Open and edit the [Dockerfile](../Dockerfile) and uncomment line 66: + +```bash +# Uncomment the following line for aarch64 support +ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/aarch64-linux-gnu/tegra/ +``` + + +## Dev Container + +To start the VS Code Dev Container, run the following command from the root directory of Holohub: + +```bash +./dev_container vscode h264 +``` + +### VS Code Launch Profiles + +#### C++ + +Use the **(gdb) h264_endoscopy_tool_tracking_distributed/cpp (all fragments)** launch profile to run and debug the C++ application. + +#### Python + +Use the **(pythoncpp) h264_endoscopy_tool_tracking_distributed/python (all fragments)** launch profile to run and debug the Python application. diff --git a/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/CMakeLists.txt b/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/CMakeLists.txt new file mode 100644 index 00000000..67c87f44 --- /dev/null +++ b/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/CMakeLists.txt @@ -0,0 +1,59 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required(VERSION 3.20) +project(h264_endoscopy_tool_tracking_distributed CXX) + +find_package(holoscan 2.5 REQUIRED CONFIG + PATHS "/opt/nvidia/holoscan" "/workspace/holoscan-sdk/install") + +add_executable(h264_endoscopy_tool_tracking_distributed + main.cpp +) + +target_link_libraries(h264_endoscopy_tool_tracking_distributed + PRIVATE + holoscan::core + holoscan::ops::gxf_codelet + holoscan::ops::format_converter + holoscan::ops::holoviz + holoscan::ops::video_encoder + holoscan::ops::tensor_to_video_buffer + lstm_tensor_rt_inference + tool_tracking_postprocessor +) + +# Copy the config to the binary directory +add_custom_target(h264_endoscopy_tool_tracking_distributed_yaml + COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_CURRENT_SOURCE_DIR}/endoscopy_tool_tracking.yaml" ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS "endoscopy_tool_tracking.yaml" + BYPRODUCTS "endoscopy_tool_tracking.yaml" +) +add_dependencies(h264_endoscopy_tool_tracking_distributed h264_endoscopy_tool_tracking_distributed_yaml) + +# Default to download datasets +option(HOLOHUB_DOWNLOAD_DATASETS "Download datasets" ON) + +# Download the endoscopy sample data +if(HOLOHUB_DOWNLOAD_DATASETS) + include(holoscan_download_data) + holoscan_download_data(endoscopy + URL https://api.ngc.nvidia.com/v2/resources/nvidia/clara-holoscan/holoscan_endoscopy_sample_data/versions/20230222/zip + DOWNLOAD_NAME holoscan_endoscopy_sample_data_20230222.zip + URL_MD5 d54f84a562d29ed560a87d2607eba973 + DOWNLOAD_DIR ${HOLOHUB_DATA_DIR} + ) + add_dependencies(h264_endoscopy_tool_tracking_distributed endoscopy_data) +endif() diff --git a/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/cloud_inference_fragment.hpp b/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/cloud_inference_fragment.hpp new file mode 100644 index 00000000..a7b5f183 --- /dev/null +++ b/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/cloud_inference_fragment.hpp @@ -0,0 +1,98 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CLOUD_INFERENCE_FRAGMENT_HPP +#define CLOUD_INFERENCE_FRAGMENT_HPP + +#include +#include +#include +#include + +#include "gxf_imports.hpp" + +using namespace holoscan; + +class CloudInferenceFragment : public holoscan::Fragment { + private: + std::string model_dir_; + uint32_t width_ = 0; + uint32_t height_ = 0; + + public: + CloudInferenceFragment(const std::string& model_dir, const uint32_t width, const uint32_t height) + : model_dir_(model_dir), width_(width), height_(height) {} + + void compose() override { + auto response_condition = make_condition("response_condition"); + auto video_decoder_context = make_resource( + "decoder-context", Arg("async_scheduling_term") = response_condition); + + auto request_condition = make_condition("request_condition"); + auto video_decoder_request = + make_operator("video_decoder_request", + from_config("video_decoder_request"), + request_condition, + Arg("async_scheduling_term") = request_condition, + Arg("videodecoder_context") = video_decoder_context); + + auto video_decoder_response = make_operator( + "video_decoder_response", + from_config("video_decoder_response"), + response_condition, + Arg("pool") = make_resource("pool"), + Arg("videodecoder_context") = video_decoder_context); + + auto decoder_output_format_converter = make_operator( + "decoder_output_format_converter", + from_config("decoder_output_format_converter"), + Arg("pool") = make_resource("pool")); + + auto rgb_float_format_converter = make_operator( + "rgb_float_format_converter", + from_config("rgb_float_format_converter"), + Arg("pool") = make_resource("pool")); + + const std::string model_file_path = model_dir_ + "/tool_loc_convlstm.onnx"; + const std::string engine_cache_dir = model_dir_ + "/engines"; + + auto lstm_inferer = make_operator( + "lstm_inferer", + from_config("lstm_inference"), + Arg("model_file_path", model_file_path), + Arg("engine_cache_dir", engine_cache_dir), + Arg("pool") = make_resource("pool"), + Arg("cuda_stream_pool") = make_resource("cuda_stream", 0, 0, 0, 1, 5)); + + auto tool_tracking_postprocessor = make_operator( + "tool_tracking_postprocessor", + from_config("tool_tracking_postprocessor"), + Arg("device_allocator") = make_resource("device_allocator"), + Arg("host_allocator") = make_resource("host_allocator")); + + add_operator(video_decoder_request); + add_flow(video_decoder_response, + decoder_output_format_converter, + {{"output_transmitter", "source_video"}}); + add_flow( + decoder_output_format_converter, rgb_float_format_converter, {{"tensor", "source_video"}}); + add_flow(rgb_float_format_converter, lstm_inferer); + add_flow(lstm_inferer, tool_tracking_postprocessor, {{"tensor", "in"}}); + } +}; + +#endif diff --git a/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/endoscopy_tool_tracking.yaml b/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/endoscopy_tool_tracking.yaml new file mode 100644 index 00000000..49b8e130 --- /dev/null +++ b/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/endoscopy_tool_tracking.yaml @@ -0,0 +1,147 @@ +%YAML 1.2 +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +extensions: + - libgxf_videodecoder.so + - libgxf_videodecoderio.so + - libgxf_videoencoder.so + - libgxf_videoencoderio.so + - ../../../../lib/gxf_extensions/libgxf_lstm_tensor_rt_inference.so + +replayer: + basename: "surgical_video" + frame_rate: 0 # as specified in timestamps + repeat: true # default: false + realtime: true # default: true + count: 0 # default: 0 (no frame count restriction) + +bitstream_reader: + outbuf_storage_type: 0 + aud_nal_present: 0 + +video_decoder_request: + inbuf_storage_type: 1 + +video_decoder_response: + outbuf_storage_type: 1 + +decoder_output_format_converter: + in_dtype: "nv12" + out_dtype: "rgb888" + +rgb_float_format_converter: + out_tensor_name: rgb_float_format_converter_out + in_dtype: "rgb888" + out_dtype: "float32" + scale_min: 0.0 + scale_max: 255.0 + +format_converter_replayer: + out_tensor_name: source_video + out_dtype: "float32" + scale_min: 0.0 + scale_max: 255.0 + +lstm_inference: + input_tensor_names: + - rgb_float_format_converter_out + - cellstate_in + - hiddenstate_in + input_state_tensor_names: + - cellstate_in + - hiddenstate_in + input_binding_names: + - data_ph:0 # (shape=[1, 480, 854, 3], dtype=float32) <==> source_video + - cellstate_ph:0 # (shape=[1, 60, 107, 7], dtype=float32) == internal state + - hiddenstate_ph:0 # (shape=[1, 60, 107, 7], dtype=float32) == internal state + output_tensor_names: + - cellstate_out + - hiddenstate_out + - probs + - scaled_coords + - binary_masks + output_state_tensor_names: + - cellstate_out + - hiddenstate_out + output_binding_names: + - Model/net_states:0 # (shape=[ 1, 60, 107, 7], dtype=float32) + - Model/net_hidden:0 # (shape=[ 1, 60, 107, 7], dtype=float32) + - probs:0 # (shape=[1, 7], dtype=float32) + - Localize/scaled_coords:0 # (shape=[1, 7, 2], dtype=float32) + - Localize_1/binary_masks:0 # (shape=[1, 7, 60, 107], dtype=float32) + force_engine_update: false + verbose: true + max_workspace_size: 2147483648 + enable_fp16_: true + +tool_tracking_postprocessor: + +holoviz: + tensors: + - name: "" + type: color + opacity: 1.0 + priority: 0 + - name: mask + type: color + opacity: 1.0 + priority: 1 + - name: scaled_coords + type: crosses + opacity: 1.0 + line_width: 4 + color: [1.0, 0.0, 0.0, 1.0] + priority: 2 + - name: scaled_coords + type: text + opacity: 1.0 + priority: 3 + color: [1.0, 1.0, 1.0, 0.9] + text: + - Grasper + - Bipolar + - Hook + - Scissors + - Clipper + - Irrigator + - Spec.Bag + +holoviz_overlay: + headless: true + tensors: + - name: mask + type: color + opacity: 1.0 + priority: 1 + - name: scaled_coords + type: crosses + opacity: 1.0 + line_width: 4 + color: [1.0, 0.0, 0.0, 1.0] + priority: 2 + - name: scaled_coords + type: text + opacity: 1.0 + priority: 3 + color: [1.0, 1.0, 1.0, 0.9] + text: + - Grasper + - Bipolar + - Hook + - Scissors + - Clipper + - Irrigator + - Spec.Bag diff --git a/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/gxf_imports.hpp b/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/gxf_imports.hpp new file mode 100644 index 00000000..756159c1 --- /dev/null +++ b/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/gxf_imports.hpp @@ -0,0 +1,99 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef H264_ENDOSCOPY_TOOL_TRACKING_DISTRIBUTED_GXF_IMPORTS_HPP +#define H264_ENDOSCOPY_TOOL_TRACKING_DISTRIBUTED_GXF_IMPORTS_HPP + +#include +#include + +// Import h.264 GXF codelets and components as Holoscan operators and resources +// Starting with Holoscan SDK v2.1.0, importing GXF codelets/components as Holoscan operators/ +// resources can be done using the HOLOSCAN_WRAP_GXF_CODELET_AS_OPERATOR and +// HOLOSCAN_WRAP_GXF_COMPONENT_AS_RESOURCE macros. This new feature allows using GXF codelets +// and components in Holoscan applications without writing custom class wrappers (for C++) and +// Python wrappers (for Python) for each GXF codelet and component. +// For the VideoEncoderRequestOp class, since it needs to override the setup() to provide custom +// parameters and override the initialize() to register custom converters, it requires a custom +// class that extends the holoscan::ops::GXFCodeletOp class. + +// The VideoDecoderResponseOp implements nvidia::gxf::VideoDecoderResponse and handles the output +// of the decoded H264 bit stream. +// Parameters: +// - pool (std::shared_ptr): Memory pool for allocating output data. +// - outbuf_storage_type (uint32_t): Output Buffer Storage(memory) type used by this allocator. +// Can be 0: kHost, 1: kDevice. +// - videodecoder_context (std::shared_ptr): Decoder context +// Handle. +HOLOSCAN_WRAP_GXF_CODELET_AS_OPERATOR(VideoDecoderResponseOp, "nvidia::gxf::VideoDecoderResponse") + +// The VideoDecoderRequestOp implements nvidia::gxf::VideoDecoderRequest and handles the input +// for the H264 bit stream decode. +// Parameters: +// - inbuf_storage_type (uint32_t): Input Buffer storage type, 0:kHost, 1:kDevice. +// - async_scheduling_term (std::shared_ptr): Asynchronous +// scheduling condition. +// - videodecoder_context (std::shared_ptr): Decoder +// context Handle. +// - codec (uint32_t): Video codec to use, 0:H264, only H264 supported. Default:0. +// - disableDPB (uint32_t): Enable low latency decode, works only for IPPP case. +// - output_format (std::string): VidOutput frame video format, nv12pl and yuv420planar are +// supported. +HOLOSCAN_WRAP_GXF_CODELET_AS_OPERATOR(VideoDecoderRequestOp, "nvidia::gxf::VideoDecoderRequest") + +// The VideoDecoderContext implements nvidia::gxf::VideoDecoderContext and holds common variables +// and underlying context. +// Parameters: +// - async_scheduling_term (std::shared_ptr): Asynchronous +// scheduling condition required to get/set event state. +HOLOSCAN_WRAP_GXF_COMPONENT_AS_RESOURCE(VideoDecoderContext, "nvidia::gxf::VideoDecoderContext") + +// The VideoReadBitstreamOp implements nvidia::gxf::VideoReadBitStream and reads h.264 video files +// from the disk at the specified input file path. +// Parameters: +// - input_file_path (std::string): Path to image file +// - pool (std::shared_ptr): Memory pool for allocating output data +// - outbuf_storage_type (int32_t): Output Buffer storage type, 0:kHost, 1:kDevice +HOLOSCAN_WRAP_GXF_CODELET_AS_OPERATOR(VideoReadBitstreamOp, "nvidia::gxf::VideoReadBitStream") + +// The VideoWriteBitstreamOp implements nvidia::gxf::VideoWriteBitstream and writes bit stream to +// the disk at specified output path. +// Parameters: +// - output_video_path (std::string): The file path of the output video +// - frame_width (int): The width of the output video +// - frame_height (int): The height of the output video +// - inbuf_storage_type (int): Input Buffer storage type, 0:kHost, 1:kDevice +HOLOSCAN_WRAP_GXF_CODELET_AS_OPERATOR(VideoWriteBitstreamOp, "nvidia::gxf::VideoWriteBitstream") + +// The VideoEncoderResponseOp implements nvidia::gxf::VideoEncoderResponse and handles the output +// of the encoded YUV frames. +// Parameters: +// - pool (std::shared_ptr): Memory pool for allocating output data. +// - videoencoder_context (std::shared_ptr): Encoder context +// handle. +// - outbuf_storage_type (uint32_t): Output Buffer Storage(memory) type used by this allocator. +// Can be 0: kHost, 1: kDevice. Default: 1. +HOLOSCAN_WRAP_GXF_CODELET_AS_OPERATOR(VideoEncoderResponseOp, "nvidia::gxf::VideoEncoderResponse") + +// The VideoEncoderContext implements nvidia::gxf::VideoEncoderContext and holds common variables +// and underlying context. +// Parameters: +// - async_scheduling_term (std::shared_ptr): Asynchronous +// scheduling condition required to get/set event state. +HOLOSCAN_WRAP_GXF_COMPONENT_AS_RESOURCE(VideoEncoderContext, "nvidia::gxf::VideoEncoderContext") + +#endif diff --git a/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/main.cpp b/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/main.cpp new file mode 100644 index 00000000..2c5f7e41 --- /dev/null +++ b/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/main.cpp @@ -0,0 +1,126 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +#include "cloud_inference_fragment.hpp" +#include "video_input_fragment.hpp" +#include "viz_fragment.hpp" + +using namespace holoscan; + +class App : public holoscan::Application { + public: + void set_datapath(const std::string& path) { datapath_ = path; } + + void compose() override { + using namespace holoscan; + + auto width = 854; + auto height = 480; + + auto video_in = make_fragment("video_in", datapath_); + auto video_in_fragment = std::dynamic_pointer_cast(video_in); + auto cloud_inference = + make_fragment("inference", datapath_, width, height); + auto viz = make_fragment("viz", width, height); + + add_flow(video_in, + cloud_inference, + {{"bitstream_reader.output_transmitter", "video_decoder_request.input_frame"}}); + add_flow(video_in, viz, {{"decoder_output_format_converter.tensor", "holoviz.receivers"}}); + add_flow(cloud_inference, + viz, + {{"tool_tracking_postprocessor.out_coords", "holoviz.receivers"}, + {"tool_tracking_postprocessor.out_mask", "holoviz.receivers"}}); + } + + private: + std::string datapath_ = "data/endoscopy"; +}; + +/** Helper function to parse the command line arguments */ +bool parse_arguments(int argc, char** argv, std::string& data_path, std::string& config_path) { + static struct option long_options[] = { + {"data", required_argument, 0, 'd'}, {"config", required_argument, 0, 'c'}, {0, 0, 0, 0}}; + + while (int c = getopt_long(argc, argv, "d:c:", long_options, NULL)) { + if (c == -1 || c == '?') break; + + switch (c) { + case 'c': + config_path = optarg; + break; + case 'd': + data_path = optarg; + break; + default: + holoscan::log_error("Unhandled option '{}'", static_cast(c)); + return false; + } + } + + return true; +} + +/** Main function */ +int main(int argc, char** argv) { + // Parse the arguments + std::string config_path = ""; + std::string data_directory = ""; + if (!parse_arguments(argc, argv, data_directory, config_path)) { return 1; } + if (data_directory.empty()) { + // Get the input data environment variable + auto input_path = std::getenv("HOLOSCAN_INPUT_PATH"); + if (input_path != nullptr && input_path[0] != '\0') { + data_directory = std::string(input_path); + } else if (std::filesystem::is_directory(std::filesystem::current_path() / "data/endoscopy")) { + data_directory = std::string((std::filesystem::current_path() / "data/endoscopy").c_str()); + } else { + HOLOSCAN_LOG_ERROR( + "Input data not provided. Use --data or set HOLOSCAN_INPUT_PATH environment variable."); + exit(-1); + } + } + + if (config_path.empty()) { + // Get the input data environment variable + auto config_file_path = std::getenv("HOLOSCAN_CONFIG_PATH"); + if (config_file_path == nullptr || config_file_path[0] == '\0') { + auto config_file = std::filesystem::canonical(argv[0]).parent_path(); + config_path = config_file / std::filesystem::path("endoscopy_tool_tracking.yaml"); + } else { + config_path = config_file_path; + } + } + + auto app = holoscan::make_application(); + + HOLOSCAN_LOG_INFO("Using configuration file from {}", config_path); + app->config(config_path); + + HOLOSCAN_LOG_INFO("Using input data from {}", data_directory); + app->set_datapath(data_directory); + + app->run(); + + return 0; +} diff --git a/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/metadata.json b/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/metadata.json new file mode 100644 index 00000000..d85e4109 --- /dev/null +++ b/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/metadata.json @@ -0,0 +1,58 @@ +{ + "application": { + "name": "H.264 Endoscopy Tool Tracking Distributed", + "authors": [ + { + "name": "Holoscan Team", + "affiliation": "NVIDIA" + } + ], + "language": "C++", + "version": "1.0", + "changelog": { + "1.0": "Initial Release" + }, + "holoscan_sdk": { + "minimum_required_version": "2.5.0", + "tested_versions": [ + "2.5.0" + ] + }, + "platforms": [ + "amd64", + "arm64" + ], + "tags": [ + "Endoscopy", + "Tracking", + "AJA", + "Video Decoding", + "Video Encoding" + ], + "ranking": 0, + "dependencies": { + "operators": [ + { + "name": "videodecoder", + "version": "1.0" + }, + { + "name": "videoencoder", + "version": "1.0" + } + ], + "data": [ + { + "name": "Holoscan Sample App Data for AI-based Endoscopy Tool Tracking", + "description": "This resource contains the convolutional LSTM model for tool tracking in laparoscopic videos by Nwoye et. al [1], and a sample surgical video.", + "url": "https://catalog.ngc.nvidia.com/orgs/nvidia/teams/clara-holoscan/resources/holoscan_endoscopy_sample_data", + "version": "20230222" + } + ] + }, + "run": { + "command": "./h264_endoscopy_tool_tracking_distributed --data /endoscopy", + "workdir": "holohub_app_bin" + } + } +} \ No newline at end of file diff --git a/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/video_input_fragment.hpp b/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/video_input_fragment.hpp new file mode 100644 index 00000000..8ce4404e --- /dev/null +++ b/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/video_input_fragment.hpp @@ -0,0 +1,75 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef VIDOE_INPUT_FRAGMENT_HPP +#define VIDOE_INPUT_FRAGMENT_HPP + +#include +#include +#include + +#include "gxf_imports.hpp" + +using namespace holoscan; + +class VideoInputFragment : public holoscan::Fragment { + private: + std::string input_dir_; + + public: + explicit VideoInputFragment(const std::string& input_dir) : input_dir_(input_dir) {} + + void compose() override { + auto bitstream_reader = make_operator( + "bitstream_reader", + from_config("bitstream_reader"), + Arg("input_file_path", input_dir_ + "/surgical_video.264"), + make_condition(750), + make_condition("periodic-condition", + Arg("recess_period") = std::string("25hz")), + Arg("pool") = make_resource("pool")); + auto response_condition = make_condition("response_condition"); + auto video_decoder_context = make_resource( + "decoder-context", Arg("async_scheduling_term") = response_condition); + + auto request_condition = make_condition("request_condition"); + auto video_decoder_request = + make_operator("video_decoder_request", + from_config("video_decoder_request"), + request_condition, + Arg("async_scheduling_term") = request_condition, + Arg("videodecoder_context") = video_decoder_context); + + auto video_decoder_response = make_operator( + "video_decoder_response", + from_config("video_decoder_response"), + response_condition, + Arg("pool") = make_resource("pool"), + Arg("videodecoder_context") = video_decoder_context); + + auto decoder_output_format_converter = make_operator( + "decoder_output_format_converter", + from_config("decoder_output_format_converter"), + Arg("pool") = make_resource("pool")); + + add_flow(bitstream_reader, video_decoder_request, {{"output_transmitter", "input_frame"}}); + add_flow(video_decoder_response, + decoder_output_format_converter, + {{"output_transmitter", "source_video"}}); + } +}; +#endif diff --git a/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/viz_fragment.hpp b/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/viz_fragment.hpp new file mode 100644 index 00000000..e03c30a9 --- /dev/null +++ b/applications/h264/h264_endoscopy_tool_tracking_distributed/cpp/viz_fragment.hpp @@ -0,0 +1,42 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef VIZ_FRAGMENT_HPP +#define VIZ_FRAGMENT_HPP + +#include +#include + +class VizFragment : public holoscan::Fragment { + private: + uint32_t width_ = 0; + uint32_t height_ = 0; + + public: + VizFragment(const uint32_t width, const uint32_t height) : width_(width), height_(height) {} + + void compose() override { + auto visualizer_operator = + make_operator("holoviz", + from_config("holoviz"), + Arg("width") = width_, + Arg("height") = height_); + add_operator(visualizer_operator); + } +}; + +#endif diff --git a/applications/h264/h264_endoscopy_tool_tracking_distributed/python/CMakeLists.txt b/applications/h264/h264_endoscopy_tool_tracking_distributed/python/CMakeLists.txt new file mode 100644 index 00000000..599ebd6e --- /dev/null +++ b/applications/h264/h264_endoscopy_tool_tracking_distributed/python/CMakeLists.txt @@ -0,0 +1,44 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +cmake_minimum_required(VERSION 3.20) + +find_package(holoscan 2.5.0 REQUIRED CONFIG + PATHS "/opt/nvidia/holoscan" "/workspace/holoscan-sdk/install") + +# Enable the operators +add_library(h264_endoscopy_tool_tracking_distributed_python INTERFACE) +target_link_libraries(h264_endoscopy_tool_tracking_distributed_python INTERFACE holoscan::core holoscan::ops::holoviz holoscan::ops::lstm_tensor_rt_inference holoscan::ops::tensor_to_video_buffer holoscan::ops::tool_tracking_postprocessor) + +# Add testing +if(BUILD_TESTING) + add_test(NAME h264_endoscopy_tool_tracking_distributed_python_test + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/h264_endoscopy_tool_tracking_distributed.py + --config ${CMAKE_CURRENT_SOURCE_DIR}/h264_endoscopy_tool_tracking_distributed.yaml + --data ${HOLOHUB_DATA_DIR}/endoscopy + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) + + set_property(TEST h264_endoscopy_tool_tracking_distributed_python_test PROPERTY ENVIRONMENT + "PYTHONPATH=${GXF_LIB_DIR}/../python/lib:${CMAKE_BINARY_DIR}/python/lib") + + set_tests_properties(h264_endoscopy_tool_tracking_distributed_python_test PROPERTIES + PASS_REGULAR_EXPRESSION "Deactivating Graph" + FAIL_REGULAR_EXPRESSION "[^a-z]Error;ERROR;Failed") + + # For aarch64 LD_LIBRARY_PATH needs to be set + if(CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64 OR CMAKE_SYSTEM_PROCESSOR STREQUAL arm64) + set_tests_properties(h264_endoscopy_tool_tracking_distributed_python_test PROPERTIES ENVIRONMENT + "LD_LIBRARY_PATH=/usr/lib/aarch64-linux-gnu/tegra/") + endif() +endif() diff --git a/applications/h264/h264_endoscopy_tool_tracking_distributed/python/cloud_inference_fragment.py b/applications/h264/h264_endoscopy_tool_tracking_distributed/python/cloud_inference_fragment.py new file mode 100644 index 00000000..d99c96e9 --- /dev/null +++ b/applications/h264/h264_endoscopy_tool_tracking_distributed/python/cloud_inference_fragment.py @@ -0,0 +1,115 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License") +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + from holoscan.conditions import AsynchronousCondition +except ImportError as e: + raise ImportError( + "This example requires Holoscan SDK >= 2.1.0 so AsynchronousCondition is available." + ) from e + + +from gxf_imports import VideoDecoderContext, VideoDecoderRequestOp, VideoDecoderResponseOp +from holoscan.core import Fragment +from holoscan.operators import FormatConverterOp +from holoscan.resources import CudaStreamPool, UnboundedAllocator + +from holohub.lstm_tensor_rt_inference import LSTMTensorRTInferenceOp +from holohub.tool_tracking_postprocessor import ToolTrackingPostprocessorOp + + +class CloudInferenceFragment(Fragment): + def __init__( + self, + app, + name, + model_dir, + ): + super().__init__(app, name) + self.model_dir = model_dir + + def compose(self): + response_condition = AsynchronousCondition(self, name="response_condition") + video_decoder_context = VideoDecoderContext( + self, name="decoder-context", async_scheduling_term=response_condition + ) + + request_condition = AsynchronousCondition(self, name="request_condition") + video_decoder_request = VideoDecoderRequestOp( + self, + request_condition, + name="video_decoder_request", + async_scheduling_term=request_condition, + videodecoder_context=video_decoder_context, + **self.kwargs("video_decoder_request"), + ) + + video_decoder_response = VideoDecoderResponseOp( + self, + response_condition, + name="video_decoder_response", + pool=UnboundedAllocator(self, name="pool"), + videodecoder_context=video_decoder_context, + **self.kwargs("video_decoder_response"), + ) + + decoder_output_format_converter = FormatConverterOp( + self, + name="decoder_output_format_converter", + pool=UnboundedAllocator(self, name="pool"), + **self.kwargs("decoder_output_format_converter"), + ) + + rgb_float_format_converter = FormatConverterOp( + self, + name="rgb_float_format_converter", + pool=UnboundedAllocator(self, name="pool"), + **self.kwargs("rgb_float_format_converter"), + ) + + model_file_path = self.model_dir + "/tool_loc_convlstm.onnx" + engine_cache_dir = self.model_dir + "/engines" + + lstm_inferer = LSTMTensorRTInferenceOp( + self, + name="lstm_inferer", + model_file_path=model_file_path, + engine_cache_dir=engine_cache_dir, + pool=UnboundedAllocator(self, name="pool"), + cuda_stream_pool=CudaStreamPool(self, 0, 0, 0, 1, 5, name="cuda_stream"), + **self.kwargs("lstm_inference"), + ) + + tool_tracking_postprocessor = ToolTrackingPostprocessorOp( + self, + name="tool_tracking_postprocessor", + device_allocator=UnboundedAllocator(self, name="device_allocator"), + host_allocator=UnboundedAllocator(self, name="host_allocator"), + **self.kwargs("tool_tracking_postprocessor"), + ) + + self.add_operator(video_decoder_request) + self.add_flow( + video_decoder_response, + decoder_output_format_converter, + {("output_transmitter", "source_video")}, + ) + self.add_flow( + decoder_output_format_converter, + rgb_float_format_converter, + {("tensor", "source_video")}, + ) + self.add_flow(rgb_float_format_converter, lstm_inferer) + self.add_flow(lstm_inferer, tool_tracking_postprocessor, {("tensor", "in")}) diff --git a/applications/h264/h264_endoscopy_tool_tracking_distributed/python/gxf_imports.py b/applications/h264/h264_endoscopy_tool_tracking_distributed/python/gxf_imports.py new file mode 100644 index 00000000..9f5fb275 --- /dev/null +++ b/applications/h264/h264_endoscopy_tool_tracking_distributed/python/gxf_imports.py @@ -0,0 +1,111 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License") +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from holoscan.operators import GXFCodeletOp +from holoscan.resources import GXFComponentResource + +# Import h.264 GXF codelets and components as Holoscan operators and resources +# Starting with Holoscan SDK v2.1.0, importing GXF codelets/components as Holoscan operators/ +# resources can be done by extending the GXFCodeletOp class and the GXFComponentResource class. +# This new feature allows GXF codelets and components in Holoscan applications without writing +# custom class wrappers in C++ and Python wrappers for each GXF codelet and component. + + +# The VideoDecoderResponseOp implements nvidia::gxf::VideoDecoderResponse and handles the output +# of the decoded H264 bit stream. +# Parameters: +# - pool (Allocator): Memory pool for allocating output data. +# - outbuf_storage_type (int): Output Buffer Storage(memory) type used by this allocator. +# Can be 0: kHost, 1: kDevice. +# - videodecoder_context (VideoDecoderContext): Decoder context +# Handle. +class VideoDecoderResponseOp(GXFCodeletOp): + def __init__(self, fragment, *args, **kwargs): + super().__init__(fragment, "nvidia::gxf::VideoDecoderResponse", *args, **kwargs) + + +# The VideoDecoderRequestOp implements nvidia::gxf::VideoDecoderRequest and handles the input +# for the H264 bit stream decode. +# Parameters: +# - inbuf_storage_type (int): Input Buffer storage type, 0:kHost, 1:kDevice. +# - async_scheduling_term (AsynchronousCondition): Asynchronous scheduling condition. +# - videodecoder_context (VideoDecoderContext): Decoder context Handle. +# - codec (int): Video codec to use, 0:H264, only H264 supported. Default:0. +# - disableDPB (int): Enable low latency decode, works only for IPPP case. +# - output_format (str): VidOutput frame video format, nv12pl and yuv420planar are supported. +class VideoDecoderRequestOp(GXFCodeletOp): + def __init__(self, fragment, *args, **kwargs): + super().__init__(fragment, "nvidia::gxf::VideoDecoderRequest", *args, **kwargs) + + +# The VideoDecoderContext implements nvidia::gxf::VideoDecoderContext and holds common variables +# and underlying context. +# Parameters: +# - async_scheduling_term (AsynchronousCondition): Asynchronous scheduling condition required to get/set event state. +class VideoDecoderContext(GXFComponentResource): + def __init__(self, fragment, *args, **kwargs): + super().__init__(fragment, "nvidia::gxf::VideoDecoderContext", *args, **kwargs) + + +# The VideoReadBitstreamOp implements nvidia::gxf::VideoReadBitStream and reads h.264 video files +# from the disk at the specified input file path. +# Parameters: +# - input_file_path (str): Path to image file +# - pool (Allocator): Memory pool for allocating output data +# - outbuf_storage_type (int): Output Buffer storage type, 0:kHost, 1:kDevice +class VideoReadBitstreamOp(GXFCodeletOp): + def __init__(self, fragment, *args, **kwargs): + super().__init__(fragment, "nvidia::gxf::VideoReadBitStream", *args, **kwargs) + + +# The VideoWriteBitstreamOp implements nvidia::gxf::VideoWriteBitstream and writes bit stream to +# the disk at specified output path. +# Parameters: +# - output_video_path (str): The file path of the output video +# - frame_width (int): The width of the output video +# - frame_height (int): The height of the output video +# - inbuf_storage_type (int): Input Buffer storage type, 0:kHost, 1:kDevice +class VideoWriteBitstreamOp(GXFCodeletOp): + def __init__(self, fragment, *args, **kwargs): + super().__init__(fragment, "nvidia::gxf::VideoWriteBitstream", *args, **kwargs) + + +# The VideoEncoderResponseOp implements nvidia::gxf::VideoEncoderResponse and handles the output +# of the encoded YUV frames. +# Parameters: +# - pool (Allocator): Memory pool for allocating output data. +# - videoencoder_context (VideoEncoderContext): Encoder context handle. +# - outbuf_storage_type (int): Output Buffer Storage(memory) type used by this allocator. +# Can be 0: kHost, 1: kDevice. Default: 1. +class VideoEncoderResponseOp(GXFCodeletOp): + def __init__(self, fragment, *args, **kwargs): + super().__init__(fragment, "nvidia::gxf::VideoEncoderResponse", *args, **kwargs) + + +# The VideoEncoderContext implements nvidia::gxf::VideoEncoderContext and holds common variables +# and underlying context. +# Parameters: +# - async_scheduling_term (AsynchronousCondition): Asynchronous scheduling condition required to get/set event state. +class VideoEncoderContext(GXFComponentResource): + def __init__(self, fragment, *args, **kwargs): + super().__init__(fragment, "nvidia::gxf::VideoEncoderContext", *args, **kwargs) + + +# The VideoEncoderRequestOp implements nvidia::gxf::VideoEncoderRequest and handles the input for +# encoding YUV frames to H264 bit stream. +# Refer to operators/video_encoder/video_encoder_request/README.md for details +class VideoEncoderRequestOp(GXFCodeletOp): + def __init__(self, fragment, *args, **kwargs): + super().__init__(fragment, "nvidia::gxf::VideoEncoderRequest", *args, **kwargs) diff --git a/applications/h264/h264_endoscopy_tool_tracking_distributed/python/h264_endoscopy_tool_tracking_distributed.py b/applications/h264/h264_endoscopy_tool_tracking_distributed/python/h264_endoscopy_tool_tracking_distributed.py new file mode 100644 index 00000000..242a067e --- /dev/null +++ b/applications/h264/h264_endoscopy_tool_tracking_distributed/python/h264_endoscopy_tool_tracking_distributed.py @@ -0,0 +1,107 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License") +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +import sys +from argparse import ArgumentParser, Namespace + +from cloud_inference_fragment import CloudInferenceFragment +from holoscan.core import Application +from video_input_fragment import VideoInputFragment +from viz_fragment import VizFragment + +logger = logging.getLogger("h264_endoscopy_tool_tracking_distributed") + + +class EndoscopyApp(Application): + def __init__(self, data): + """Initialize the endoscopy tool tracking application""" + super().__init__() + + # set name + self.name = "Endoscopy App" + + if (data is None) or (data == "none"): + data = os.environ.get("HOLOHUB_DATA_PATH", "../data") + + self.datapath_ = data + + def compose(self): + width = 854 + height = 480 + + video_in = VideoInputFragment(self, "video_in", self.datapath_) + cloud_inference = CloudInferenceFragment(self, "inference", self.datapath_) + viz = VizFragment(self, "viz", width, height) + + self.add_flow( + video_in, + cloud_inference, + {("bitstream_reader.output_transmitter", "video_decoder_request.input_frame")}, + ) + self.add_flow( + video_in, viz, {("decoder_output_format_converter.tensor", "holoviz.receivers")} + ) + self.add_flow( + cloud_inference, + viz, + { + ("tool_tracking_postprocessor.out_coords", "holoviz.receivers"), + ("tool_tracking_postprocessor.out_mask", "holoviz.receivers"), + }, + ) + + +def parse_args() -> Namespace: + parser = ArgumentParser(description="Distributed Endoscopy Tool Tracking Application") + parser.add_argument( + "--data", + type=str, + required=False, + default=os.environ.get("HOLOSCAN_INPUT_PATH", None), + help="Input dataset.", + ) + parser.add_argument( + "--config", + type=str, + required=False, + default=os.environ.get( + "HOLOSCAN_CONFIG_PATH", + os.path.join( + os.path.dirname(__file__), "h264_endoscopy_tool_tracking_distributed.yaml" + ), + ), + help="Input dataset.", + ) + + args, _ = parser.parse_known_args() + + return args + + +if __name__ == "__main__": + args = parse_args() + + if args.data is None: + logger.error( + "Input data not provided. Use --data or set HOLOSCAN_INPUT_PATH environment variable." + ) + sys.exit(-1) + + app = EndoscopyApp(args.data) + + app.config(args.config) + app.run() diff --git a/applications/h264/h264_endoscopy_tool_tracking_distributed/python/h264_endoscopy_tool_tracking_distributed.yaml b/applications/h264/h264_endoscopy_tool_tracking_distributed/python/h264_endoscopy_tool_tracking_distributed.yaml new file mode 100644 index 00000000..53db1742 --- /dev/null +++ b/applications/h264/h264_endoscopy_tool_tracking_distributed/python/h264_endoscopy_tool_tracking_distributed.yaml @@ -0,0 +1,141 @@ +%YAML 1.2 +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +extensions: + - libgxf_videodecoder.so + - libgxf_videodecoderio.so + - libgxf_videoencoder.so + - libgxf_videoencoderio.so + - lib/gxf_extensions/libgxf_lstm_tensor_rt_inference.so + +bitstream_reader: + outbuf_storage_type: 0 + aud_nal_present: 0 + +video_decoder_request: + inbuf_storage_type: 1 + +video_decoder_response: + outbuf_storage_type: 1 + +decoder_output_format_converter: + in_dtype: "nv12" + out_dtype: "rgb888" + +rgb_float_format_converter: + out_tensor_name: rgb_float_format_converter_out + in_dtype: "rgb888" + out_dtype: "float32" + scale_min: 0.0 + scale_max: 255.0 + +lstm_inference: + input_tensor_names: + - rgb_float_format_converter_out + - cellstate_in + - hiddenstate_in + input_state_tensor_names: + - cellstate_in + - hiddenstate_in + input_binding_names: + - data_ph:0 # (shape=[1, 480, 854, 3], dtype=float32) <==> source_video + - cellstate_ph:0 # (shape=[1, 60, 107, 7], dtype=float32) == internal state + - hiddenstate_ph:0 # (shape=[1, 60, 107, 7], dtype=float32) == internal state + output_tensor_names: + - cellstate_out + - hiddenstate_out + - probs + - scaled_coords + - binary_masks + output_state_tensor_names: + - cellstate_out + - hiddenstate_out + output_binding_names: + - Model/net_states:0 # (shape=[ 1, 60, 107, 7], dtype=float32) + - Model/net_hidden:0 # (shape=[ 1, 60, 107, 7], dtype=float32) + - probs:0 # (shape=[1, 7], dtype=float32) + - Localize/scaled_coords:0 # (shape=[1, 7, 2], dtype=float32) + - Localize_1/binary_masks:0 # (shape=[1, 7, 60, 107], dtype=float32) + force_engine_update: false + verbose: true + max_workspace_size: 2147483648 + enable_fp16_: true + +tool_tracking_postprocessor: + +holoviz: + tensors: + - name: "" + type: color + opacity: 1.0 + priority: 0 + - name: mask + type: color + opacity: 1.0 + priority: 1 + - name: scaled_coords + type: crosses + opacity: 1.0 + line_width: 4 + color: [1.0, 0.0, 0.0, 1.0] + priority: 2 + - name: scaled_coords + type: text + opacity: 1.0 + priority: 3 + color: [1.0, 1.0, 1.0, 0.9] + text: + - Grasper + - Bipolar + - Hook + - Scissors + - Clipper + - Irrigator + - Spec.Bag + +video_encoder_request: + inbuf_storage_type: 1 + codec: 0 + input_width: 854 + input_height: 480 + input_format: "yuv420planar" + profile: 2 + bitrate: 20000000 + framerate: 30 + config: "pframe_cqp" + rate_control_mode: 0 + qp: 20 + iframe_interval: 5 + +video_encoder_response: + outbuf_storage_type: 1 + +bitstream_writer: + frame_width: 854 + frame_height: 480 + inbuf_storage_type: 1 + +holoviz_output_format_converter: + in_dtype: "rgba8888" + out_dtype: "rgb888" + +encoder_input_format_converter: + in_dtype: "rgb888" + out_dtype: "yuv420" + +tensor_to_video_buffer: + video_format: "yuv420" + diff --git a/applications/h264/h264_endoscopy_tool_tracking_distributed/python/metadata.json b/applications/h264/h264_endoscopy_tool_tracking_distributed/python/metadata.json new file mode 100644 index 00000000..dc9acfa9 --- /dev/null +++ b/applications/h264/h264_endoscopy_tool_tracking_distributed/python/metadata.json @@ -0,0 +1,55 @@ +{ + "application": { + "name": "H.264 Endoscopy Tool Tracking Distributed", + "authors": [ + { + "name": "Holoscan Team", + "affiliation": "NVIDIA" + } + ], + "language": "Python", + "version": "1.0", + "changelog": { + "1.0": "Initial Release" + }, + "holoscan_sdk": { + "minimum_required_version": "2.5.0", + "tested_versions": ["2.5.0"] + }, + "platforms": ["amd64", "arm64"], + "tags": ["Endoscopy", "Video Decoding", "Video Encoding"], + "ranking": 1, + "dependencies": { + "operators": [ + { + "name": "videodecoder", + "version": "1.2.0" + }, + { + "name": "videodecoderio", + "version": "1.2.0" + }, + { + "name": "videoencoder", + "version": "1.2.0" + }, + { + "name": "videoencoderio", + "version": "1.2.0" + } + ], + "data": [ + { + "name": "Holoscan Sample App Data for AI-based Endoscopy Tool Tracking", + "description": "This resource contains the convolutional LSTM model for tool tracking in laparoscopic videos by Nwoye et. al [1], and a sample surgical video.", + "url": "https://catalog.ngc.nvidia.com/orgs/nvidia/teams/clara-holoscan/resources/holoscan_endoscopy_sample_data", + "version": "20230222" + } + ] + }, + "run": { + "command": "python3 /h264_endoscopy_tool_tracking_distributed.py --data /endoscopy", + "workdir": "holohub_bin" + } + } +} diff --git a/applications/h264/h264_endoscopy_tool_tracking_distributed/python/video_input_fragment.py b/applications/h264/h264_endoscopy_tool_tracking_distributed/python/video_input_fragment.py new file mode 100644 index 00000000..7898ba67 --- /dev/null +++ b/applications/h264/h264_endoscopy_tool_tracking_distributed/python/video_input_fragment.py @@ -0,0 +1,93 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License") +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + from holoscan.conditions import AsynchronousCondition, CountCondition, PeriodicCondition +except ImportError as e: + raise ImportError( + "This example requires Holoscan SDK >= 2.1.0 so AsynchronousCondition is available." + ) from e + +import os + +from gxf_imports import ( + VideoDecoderContext, + VideoDecoderRequestOp, + VideoDecoderResponseOp, + VideoReadBitstreamOp, +) +from holoscan.core import Fragment +from holoscan.operators import FormatConverterOp +from holoscan.resources import UnboundedAllocator + + +class VideoInputFragment(Fragment): + def __init__(self, app, name, video_dir): + super().__init__(app, name) + self.video_dir = video_dir + + if not os.path.exists(self.video_dir): + raise ValueError(f"Could not find video data: {video_dir=}") + + def compose(self): + bitstream_reader = VideoReadBitstreamOp( + self, + CountCondition(self, 750), + PeriodicCondition(self, name="periodic-condition", recess_period=0.04), + name="bitstream_reader", + input_file_path=f"{self.video_dir}/surgical_video.264", + pool=UnboundedAllocator(self, name="pool"), + **self.kwargs("bitstream_reader"), + ) + + response_condition = AsynchronousCondition(self, name="response_condition") + video_decoder_context = VideoDecoderContext( + self, name="decoder-context", async_scheduling_term=response_condition + ) + + request_condition = AsynchronousCondition(self, name="request_condition") + video_decoder_request = VideoDecoderRequestOp( + self, + request_condition, + name="video_decoder_request", + async_scheduling_term=request_condition, + videodecoder_context=video_decoder_context, + **self.kwargs("video_decoder_request"), + ) + + video_decoder_response = VideoDecoderResponseOp( + self, + response_condition, + name="video_decoder_response", + pool=UnboundedAllocator(self, name="pool"), + videodecoder_context=video_decoder_context, + **self.kwargs("video_decoder_response"), + ) + + decoder_output_format_converter = FormatConverterOp( + self, + name="decoder_output_format_converter", + pool=UnboundedAllocator(self, name="pool"), + **self.kwargs("decoder_output_format_converter"), + ) + + self.add_flow( + bitstream_reader, video_decoder_request, {("output_transmitter", "input_frame")} + ) + self.add_flow( + video_decoder_response, + decoder_output_format_converter, + {("output_transmitter", "source_video")}, + ) diff --git a/applications/h264/h264_endoscopy_tool_tracking_distributed/python/viz_fragment.py b/applications/h264/h264_endoscopy_tool_tracking_distributed/python/viz_fragment.py new file mode 100644 index 00000000..63a4c497 --- /dev/null +++ b/applications/h264/h264_endoscopy_tool_tracking_distributed/python/viz_fragment.py @@ -0,0 +1,35 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License") +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from holoscan.core import Fragment +from holoscan.operators import HolovizOp + + +class VizFragment(Fragment): + def __init__(self, app, name, width, height): + super().__init__(app, name) + self.width = width + self.height = height + + def compose(self): + visualizer = HolovizOp( + self, + name="holoviz", + width=self.width, + height=self.height, + **self.kwargs("holoviz"), + ) + + self.add_operator(visualizer)