diff --git a/.gitignore b/.gitignore
index 598a012..b8d8ffb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -53,9 +53,6 @@ _deps
 runtime_docker/install
 runtime_docker/*.txt
 
-# file autogenerated by setuptools_scm
-_version.py
-
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f2bca9e..6bafc51 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -194,6 +194,8 @@ list(APPEND HOLOSCAN_INSTALL_TARGETS
     op_inference_processor
     op_ping_rx
     op_ping_tx
+    op_ping_tensor_tx
+    op_ping_tensor_rx
     op_segmentation_postprocessor
     op_video_stream_recorder
     op_video_stream_replayer
diff --git a/VERSION b/VERSION
index 276cbf9..197c4d5 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.3.0
+2.4.0
diff --git a/cmake/deps/ajantv2_rapids.cmake b/cmake/deps/ajantv2_rapids.cmake
index 22ded65..721877b 100644
--- a/cmake/deps/ajantv2_rapids.cmake
+++ b/cmake/deps/ajantv2_rapids.cmake
@@ -19,6 +19,13 @@ include(${rapids-cmake-dir}/cpm/find.cmake)
 # Setting NTV2_VERSION_BUILD environment variable to avoid CMake warning
 set(ENV{NTV2_VERSION_BUILD} 1)
 
+# Do not include debug information for RelWithDebInfo builds to avoid large binaries
+if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
+    set(AJA_BUILD_TYPE "Debug")
+else()
+    set(AJA_BUILD_TYPE "Release")
+endif()
+
 rapids_cpm_find(ajantv2 17.0.1
     GLOBAL_TARGETS AJA::ajantv2
 
@@ -34,7 +41,7 @@ rapids_cpm_find(ajantv2 17.0.1
     "AJANTV2_DISABLE_TOOLS ON"
     "AJA_INSTALL_HEADERS OFF"
     "AJA_INSTALL_SOURCES OFF"
-    "CMAKE_BUILD_TYPE Release"
+    "CMAKE_BUILD_TYPE ${AJA_BUILD_TYPE}"
     EXCLUDE_FROM_ALL
 )
 
diff --git a/docs/aja_setup.rst b/docs/aja_setup.rst
index b298555..91e1643 100644
--- a/docs/aja_setup.rst
+++ b/docs/aja_setup.rst
@@ -106,6 +106,35 @@ the AJA device, so the following instructions cover this native installation.
      * :ref:`aja_testing`
 
 
+Using the AJA NTV2 Driver and SDK Build Script
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Included in the `scripts` directory is the `aja_build.sh` script which can be
+used to download the AJA NTV2 source, build the drivers and SDK, load the
+drivers, and run the `ntv2enumerateboards` utility to list the AJA boards that
+are connected to the system. To download and build the drivers and SDK, simply
+run the script:
+
+   .. code-block:: sh
+
+      $ ./scripts/aja_build.sh
+
+To optionally have the script load the drivers and list the connected devices
+once the build is complete, add the `--load-driver` flag:
+
+   .. code-block:: sh
+
+      $ ./scripts/aja_build.sh --load-driver
+
+.. Note::
+
+   The remainder of the steps in this documentation describe how to manually
+   build and load the AJA NTV2 drivers and SDK, and are not needed when using
+   the build script. However, it will still be required to reload the drivers
+   after rebooting the system by running the `load_ajantv2` command as described
+   in :ref:`aja_driver_load`.
+
+
 .. _aja_download:
 
 Downloading the AJA NTV2 SDK Source
diff --git a/docs/api/holoscan_cpp_api.md b/docs/api/holoscan_cpp_api.md
index 1ab5593..feaf133 100644
--- a/docs/api/holoscan_cpp_api.md
+++ b/docs/api/holoscan_cpp_api.md
@@ -111,6 +111,8 @@
 - {ref}`exhale_class_classholoscan_1_1ops_1_1InferenceOp`
 - {ref}`exhale_class_classholoscan_1_1ops_1_1InferenceProcessorOp`
 - {ref}`exhale_class_classholoscan_1_1ops_1_1PingRxOp`
+- {ref}`exhale_class_classholoscan_1_1ops_1_1PingTensorRxOp`
+- {ref}`exhale_class_classholoscan_1_1ops_1_1PingTensorTxOp`
 - {ref}`exhale_class_classholoscan_1_1ops_1_1PingTxOp`
 - {ref}`exhale_class_classholoscan_1_1ops_1_1SegmentationPostprocessorOp`
 - {ref}`exhale_class_classholoscan_1_1ops_1_1V4L2VideoCaptureOp`
diff --git a/docs/cli/package.md b/docs/cli/package.md
index e278015..dde5f01 100755
--- a/docs/cli/package.md
+++ b/docs/cli/package.md
@@ -6,7 +6,7 @@
 
 ## Synopsis
 
-`holoscan package` [](#cli-help) [](#cli-log-level) [](#cli-package-config) [](#cli-package-docs) [](#cli-package-models) [](#cli-package-platform) [](#cli-package-platform-config) [](#cli-package-timeout) [](#cli-package-version) [](#cli-package-base-image) [](#cli-package-build-image) [](#cli-package-build-cache) [](#cli-package-cmake-args) [](#cli-package-no-cache) [](#cli-package-sdk) [](#cli-package-source) [](#cli-package-sdk-version) [](#cli-package-holoscan-sdk-file) [](#cli-package-monai-deploy-sdk-file) [](#cli-package-output) [](#cli-package-tag) [](#cli-package-username) [](#cli-package-uid) [](#cli-package-gid) [](#cli-package-application) [](#cli-package-source)
+`holoscan package` [](#cli-help) [](#cli-log-level) [](#cli-package-config) [](#cli-package-docs) [](#cli-package-models) [](#cli-package-platform) [](#cli-package-platform-config) [](#cli-package-timeout) [](#cli-package-version) [](#cli-package-base-image) [](#cli-package-build-image) [](#cli-package-includes) [](#cli-package-build-cache) [](#cli-package-cmake-args) [](#cli-package-no-cache) [](#cli-package-sdk) [](#cli-package-source) [](#cli-package-sdk-version) [](#cli-package-holoscan-sdk-file) [](#cli-package-monai-deploy-sdk-file) [](#cli-package-output) [](#cli-package-tag) [](#cli-package-username) [](#cli-package-uid) [](#cli-package-gid) [](#cli-package-application) [](#cli-package-source)
 
 ## Examples
 
@@ -163,6 +163,28 @@ Optionally specifies the base container image for building packaged application.
 
 Optionally specifies the build container image for building C++ applications. It must be a valid Docker image tag either accessible online or via `docker images. By default, the **Packager** picks a build image to use from [NGC](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/clara-holoscan/containers/holoscan).
 
+(#cli-package-includes)=
+
+### `[--includes  [{debug,holoviz,torch,onnx}]]`
+
+To reduce the size of the packaged application container, the CLI Packager, by default, includes minimum runtime dependencies to run applications designed for Holoscan. You can specify additional runtime dependencies to be included in the packaged application using this option. The following options are available:
+
+- `debug`: includes debugging tools, such as `gdb`
+- `holoviz`: includes dependencies for Holoviz rendering on x11 and Wayland
+- `torch`: includes `libtorch` and `torchvision` runtime dependencies
+- `onnx`: includes `onnxruntime` runtime, `libnvinfer-plugin8`, `libnconnxparser8` dependencies.
+
+:::{note}
+Refer to [Developer Resources](https://github.com/nvidia-holoscan/holoscan-sdk/blob/main/DEVELOP.md#advanced-local-environment--cmake) for dependency versions.
+:::
+
+
+
+Usage:
+```bash
+holoscan package --includes holoviz torch onnx
+```
+
 (#cli-package-build-cache)=
 
 ### `[--build-cache BUILD_CACHE]`
diff --git a/docs/holoscan_create_app.md b/docs/holoscan_create_app.md
index d270278..dbdfa84 100644
--- a/docs/holoscan_create_app.md
+++ b/docs/holoscan_create_app.md
@@ -1067,15 +1067,17 @@ app->run();
 
 ### Understanding Metadata Flow
 
-Each operator in the workflow has an associated {cpp:class}`~holoscan::MetadataDictionary` object. At the start of each operator's {cpp:func}`~holoscan::Operator::compute` call this metadata dictionary will be empty (i.e. metadata does not persist from previous compute calls). When any call to {cpp:class}`~holoscan::InputContext::receive` data is made, any metadata also found in the input message will be merged into the operator's local metadata dictionary. The operator's compute method can then read, append to or remove metadata as explained in the next section. Whenever the operator emits data via a call to {cpp:class}`~holoscan::OutputContext::emit` the current status of the operator's metadata dictionary will be transmitted on that port alonside the data passed via the first argument to the emit call. Any downstream operators will then receive this metadata via their input ports.
+Each operator in the workflow has an associated {cpp:class}`~holoscan::MetadataDictionary` object. At the start of each operator's {cpp:func}`~holoscan::Operator::compute` call this metadata dictionary will be empty (i.e. metadata does not persist from previous compute calls). When any call to {cpp:func}`~holoscan::InputContext::receive` data is made, any metadata also found in the input message will be merged into the operator's local metadata dictionary. The operator's compute method can then read, append to or remove metadata as explained in the next section. Whenever the operator emits data via a call to {cpp:func}`~holoscan::OutputContext::emit` the current status of the operator's metadata dictionary will be transmitted on that port alonside the data passed via the first argument to the emit call. Any downstream operators will then receive this metadata via their input ports.
 
 ### Working With Metadata from Operator::compute
 
-Within the operator's {cpp:func}`~holoscan::Operator::compute` method, the {cpp:func}`~holoscan::Operator::metadata` method can be called to get a shared pointer to the {cpp:class}`~holoscan::MetadataDictionary` of the operator. The metadata dictionary provides a similar API to a `std::unordered_map` (C++) where the keys are strings (`std::string` for C++) and the values can store any object type (via a C++ {cpp:type}`~holoscan::MetadataObject` holding a `std::any`). Templated {cpp:func}`~holoscan::MetadataObject::get` and {cpp:func}`~holoscan::MetadataObject::set` method are provided as demonstrated below to allow directly setting values of a given type without having to explicitly work with the internal {cpp:type}`~holoscan::MetadataObject` type.
+Within the operator's {cpp:func}`~holoscan::Operator::compute` method, the {cpp:func}`~holoscan::Operator::metadata` method can be called to get a shared pointer to the {cpp:class}`~holoscan::MetadataDictionary` of the operator. The metadata dictionary provides a similar API to a `std::unordered_map` (C++) or `dict` (Python) where the keys are strings (`std::string` for C++) and the values can store any object type (via a C++ {cpp:type}`~holoscan::MetadataObject` holding a `std::any`).
 
 
 `````{tab-set}
 ````{tab-item} C++
+Templated {cpp:func}`~holoscan::MetadataObject::get` and {cpp:func}`~holoscan::MetadataObject::set` method are provided as demonstrated below to allow directly setting values of a given type without having to explicitly work with the internal {cpp:type}`~holoscan::MetadataObject` type.
+
 ```cpp
 
 // Receiving from a port updates operator metadata with any metadata found on the port
@@ -1112,28 +1114,131 @@ meta->clear();
 // Any emit call after this point would not transmit a metadata object
 op_output.emit(data, "output2");
 ```
+See the {py:class}`~holoscan.core.MetadataDictionary` API docs for all available methods.
 ````
-`````
+````{tab-item} Python
+A Pythonic interface is provided for the {py:class}`~holoscan.core.MetadataObject` type.
+
+```python
+
+# Receiving from a port updates operator metadata with any metadata found on the port
+input_tensors = op_input.receive("in")
+
+# self.metadata can be used to access the shared MetadataDictionary
+# for example we can check if a key exists
+has_key = "my_key" in self.metadata
+
+# get the number of keys
+num_keys = len(self.metadata)
+
+# get a list of the keys
+print(f"metadata keys = {self.metadata.keys()}")
+
+# iterate over the values in the dictionary using the `items()` method
+for key, value in self.metadata.items():
+    # process item
+    pass
+
+# print a Python dict of the keys/values
+print(self.metadata)
+
+# Retrieve existing values. If the underlying value is a C++ class, a conversion to an equivalent Python object will be made (e.g. `std::vector<std::string>` to `List[str]`).
+name = self.metadata["patient_name"]
+age = self.metadata["age"]
+
+# It is also supported to use the get method along with an optional default value to use
+# if the key is not present.
+flag = self.metadata.get("flag", False)
+
+# print the current metadata policy
+print(f"metadata policy = {self.metadata_policy}")
+
+# Add a new value (if a key already exists, the value will be updated according to the
+# operator's metadata_policy). If the value is set via the indexing operator as below,
+# the Python object itself is stored as the value.
+spacing = (1.0, 1.0, 3.0)
+self.metadata["pixel_spacing"] = spacing
+
+# In some cases, if sending metadata to downstream C++-based operators, it may be desired
+# to instead store the metadata value as an equivalent C++ type. In that case, it is
+# necessary to instead set the value using the `set` method with `cast_to_cpp=True`.
+# Automatic casting is supported for bool, str, and various numeric and iterator or
+# sequence types.
+
+# The following would result in the spacing `Tuple[float]` being stored as a
+# C++ `std::vector<double>`. Here we show use of the `pop` method to remove a previous value
+# if present.
+self.metadata.pop("pixel_spacing", None)
+self.metadata.set("pixel_spacing", spacing, cast_to_cpp=True)
+
+# To store floating point elements at a different than the default (double) precision or
+# integers at a different precision than int64_t, use the dtype argument and pass a
+# numpy.dtype argument corresponding to the desired C++ type. For example, the following
+# would instead store `spacing` as a std::vector<float> instead. In this case we show
+# use of Python's `del` instead of the pop method to remove an existing item.
+del self.metadata["pixel_spacing"]
+self.metadata.set("pixel_spacing", spacing, dtype=np.float32, cast_to_cpp=True)
+
+# Remove a value
+del self["patient name"]
+
+# ... Some processing to produce output `data` could go here ...
+
+# Current state of `meta` will automatically be emitted along with `data` in the call below
+op_output.emit(data, "output1")
+
+# Can clear all items
+self.metadata.clear()
 
-See the {cpp:class}`~holoscan::MetadataDictionary` API docs for all available methods. Most of these like `begin()` and `end()` iterators and the `find()` method match the corresponding methods of `std::unordered_map`.
+# Any emit call after this point would not transmit a metadata object
+op_output.emit(data, "output2")
+```
+
+See the {py:class}`~holoscan.core.MetadataDictionary` API docs for all available methods.
+
+The above code illustrated various ways of working with and updating an operator's metadata.
+
+:::{note}
+Pay particular attention to the details of how metadata is set. When working with pure Python applications it is best to just use `self.metadata[key] = value` or `self.metadata.set(key, value)` to pass Python objects as the value. This will just use a shared object and not result in copies to/from corresponding C++ types. However, when interacting with other operators that wrap a C++ implementation, their `compute` method would expected C++ metadata. In that case, the `set` method with `cast_to_cpp=True` is needed to cast to the expected C++ type. This was shown in some of the "pixel_spacing" set calls in the example above. For convenience, the `value` passed to the `set` method can also be a NumPy array, but note that in this case, a copy into a new C++ std::vector is performed. The dtype of the array will be respected when creating the vector. In general, the types that can currently be cast to C++ are scalar numeric values, strings and Python Iterators or Sequences of these (the sequence will be converted to a 1D or 2D C++ std::vector<T> so the items in the Python sequence cannot be of mixed type).
+:::
+````
+`````
 
 #### Metadata Update Policies
 
-The operator class also has a {cpp:func}`~holoscan::Operator::metadata_policy` method that can be used to set a {cpp:func}`~holoscan::MetadataPolicy` to use when handling duplicate metadata keys across multiple input ports of the operator. The available options are:
+`````{tab-set}
+````{tab-item} C++
+
+The operator class also has a {cpp:func}`~holoscan::Operator::metadata_policy` method that can be used to set a {cpp:enum}`~holoscan::MetadataPolicy` to use when handling duplicate metadata keys across multiple input ports of the operator. The available options are:
 - "update" (`MetadataPolicy::kUpdate`): replace any existing key from a prior `receive` call with one present in a subsequent `receive` call.
 - "reject" (`MetadataPolicy::kReject`): Reject the new key/value pair when a key already exists due to a prior `receive` call.
 - "raise" (`MetadataPolicy::kRaise`): Throw a `std::runtime_error` if a duplicate key is encountered. This is the default policy.
 
 The metadata policy would typically be set during {cpp:func}`~holoscan::Application::compose` as in the following example:
 
-`````{tab-set}
-````{tab-item} C++
 ```cpp
 
 // Example for setting metadata policy from Application::compose()
 my_op = make_operator<MyOperator>("my_op");
 my_op->metadata_policy(holoscan::MetadataPolicy::kRaise);
 
+```
+````
+````{tab-item} Python
+
+The operator class also has a {py:func}`~holoscan.core.Operator.metadata_policy` property that can be used to set a {py:class}`~holoscan.core.MetadataPolicy` to use when handling duplicate metadata keys across multiple input ports of the operator. The available options are:
+- "update" (`MetadataPolicy.UPDATE`): replace any existing key from a prior `receive` call with one present in a subsequent `receive` call. This is the default policy.
+- "reject" (`MetadataPolicy.REJECT`): Reject the new key/value pair when a key already exists due to a prior `receive` call.
+- "raise" (`MetadataPolicy.RAISE`): Throw an exception if a duplicate key is encountered.
+
+The metadata policy would typically be set during {py:func}`~holoscan.core.Application.compose` as in the following example:
+
+```python
+
+# Example for setting metadata policy from Application.compose()
+my_op = MyOperator(self, name="my_op")
+my_op.metadata_policy = holoscan.MetadataPolicy.RAISE
+
 ```
 ````
 `````
@@ -1143,7 +1248,7 @@ The policy only applies to the operator on which it was set.
 
 Sending metadata between two fragments of a distributed application is supported, but there are a couple of aspects to be aware of.
 
-1. Sending metadata over the network requires serialization and deserialization of the metadata keys and values. The value types supported for this are the same as for data emitted over output ports (see the table in the section on {ref}`object serialization<object-serialization>`). The only exception is that {cpp:class}`~holoscan::Tensor` and {cpp:func}`~holoscan::TensorMap` values cannot be sent as metadata values between fragments. Any {ref}`custom codecs<object-serialization>` registered for the SDK will automatically also be available for serialization of metadata values.
+1. Sending metadata over the network requires serialization and deserialization of the metadata keys and values. The value types supported for this are the same as for data emitted over output ports (see the table in the section on {ref}`object serialization<object-serialization>`). The only exception is that {cpp:class}`~holoscan::Tensor` and {cpp:class}`~holoscan::TensorMap` values cannot be sent as metadata values between fragments (this restriction also applies to tensor-like Python objects). Any {ref}`custom codecs<object-serialization>` registered for the SDK will automatically also be available for serialization of metadata values.
 2. There is a practical size limit of several kilobytes in the amount of metadata that can be transmitted between fragments. This is because metadata is currently sent along with other entity header information in the UCX header, which has fixed size limit (the metadata is stored along with other header information within the size limit defined by the `HOLOSCAN_UCX_SERIALIZATION_BUFFER_SIZE` {ref}`environment variable<holoscan-distributed-env>`).
 
 The above restrictions only apply to metadata sent **between** fragments. Within a fragment there is no size limit on metadata (aside from system memory limits) and no serialization or deserialization step is needed.
diff --git a/docs/holoscan_create_distributed_app.md b/docs/holoscan_create_distributed_app.md
index 98cb2de..1db5f34 100644
--- a/docs/holoscan_create_distributed_app.md
+++ b/docs/holoscan_create_distributed_app.md
@@ -206,7 +206,7 @@ The following are known limitations of the distributed application support in th
 
 #### 1. A connection error message is displayed even when the distributed application is running correctly.
 
-The message `Connection dropped with status -25 (Connection reset by remote peer)` appears in the console even when the application is functioning properly. This is a known issue and will be addressed in future updates, ensuring that this message will only be displayed in the event of an actual connection error.
+The message `Connection dropped with status -25 (Connection reset by remote peer)` appears in the console even when the application is functioning properly. This is a known issue and will be addressed in future updates, ensuring that this message will only be displayed in the event of an actual connection error. It currently is printed once some fragments complete their work and start shutdown. Any connections from those fragments to ones that remain open are disconnected at that point, resulting in the logged message.
 
 #### 2. GPU tensors can only currently be sent/received by UCX from a single device on a given node.
 
diff --git a/docs/holoscan_create_operator.md b/docs/holoscan_create_operator.md
index 3631374..016de3e 100644
--- a/docs/holoscan_create_operator.md
+++ b/docs/holoscan_create_operator.md
@@ -53,6 +53,10 @@ Typically, the `start()` and the `stop()` functions are only called once during
     compute -> stop
 ```
 
+:::{warning}
+If Python bindings are going to be created for this C++ operator, it is recommended to put any cleanup of resources allocated in the `initialize()` and/or `start()` methods into the `stop()` method of the operator and **not** in its destructor. This is necessary as a workaround to a current issue where it is not guaranteed that the destructor always gets called prior to Python application termination. The `stop()` method will always be explicitly called, so we can be assured that any cleanup happens as expected.
+:::
+
 We can override the default behavior of the operator by implementing the above methods. The following example shows how to implement a custom operator that overrides start, stop and compute methods.
 
 
@@ -256,6 +260,42 @@ standpoint to transmit a shared pointer to the object rather than making a copy.
 pointers are used and the same tensor is sent to more than one downstream operator, one should
 avoid in-place operations on the tensor or race conditions between operators may occur.
 
+
+If you need to configure arguments or perform other setup tasks before or after the operator is initialized, you can override the `initialize()` method. This method is called once before the `start()` method.
+
+Example:
+
+```cpp
+  void initialize() override {
+    // Register custom type and codec for serialization
+    register_converter<std::array<float, 3>>();
+    register_codec<std::vector<InputSpec>>("std::vector<holoscan::ops::HolovizOp::InputSpec>", true);
+
+    // Set up prerequisite parameters before calling Operator::initialize()
+    auto frag = fragment();
+
+    // Check if an argument for 'allocator' exists
+    auto has_allocator = std::find_if(
+        args().begin(), args().end(), [](const auto& arg) { return (arg.name() == "allocator"); });
+    // Create the allocator if no argument is provided
+    if (has_allocator == args().end()) {
+      allocator_ = frag->make_resource<UnboundedAllocator>("allocator");
+      add_arg(allocator_.get());
+    }
+
+    // Call the parent class's initialize() method to complete the initialization.
+    // Operator::initialize must occur after all arguments have been added.
+    Operator::initialize();
+
+    // After Operator::initialize(), the operator is ready for use and the parameters are set
+    int multiplier = multiplier_;
+    HOLOSCAN_LOG_INFO("Multiplier: {}", multiplier);
+  }
+```
+
+For details on the `register_converter()` and `register_codec()` methods, refer to {cpp:func}`holoscan::Operator::register_converter` for the custom parameter type and the section on {ref}`object serialization<object-serialization>` for distributed applications.
+
+
 (specifying-operator-parameters-cpp)=
 
 #### Specifying operator parameters (C++)
@@ -631,7 +671,7 @@ spec:
 :::
 
 
-##### Configuring input port queue size and message batch condition
+##### Configuring input port queue size and message batch condition (C++)
 
 If you want to receive multiple objects on a port and process them in batches, you can increase the queue size of the input port and set the `min_size` parameter of the `MessageAvailableCondition` condition to the desired batch size. This can be done by calling the `connector()` and `condition()` methods with the desired arguments, using the batch size as the `capacity` and `min_size` parameters, respectively.
 
@@ -1242,10 +1282,21 @@ class MyOp(Operator):
 ```
 #### `setup()` method vs `initialize()` vs `__init__()`
 
-The {py:class}`~holoscan.core.Operator.setup` method aims to get the "operator's spec" by providing {py:class}`~holoscan.core.OperatorSpec` object as a spec param. When {py:class}`~holoscan.core.Operator.__init__`  is called, it calls C++'s {cpp:func}`Operator::spec <holoscan::Operator::spec>` method (and also sets {py:class}`self.spec <holoscan.core.Operator.spec>` class member), and calls {py:class}`setup <holoscan.core.Operator.setup>` method so that Operator's {py:class}`~holoscan.core.Operator.spec` property holds the operator's specification. (See the [source code](https://github.com/nvidia-holoscan/holoscan-sdk/blob/main/python/holoscan/core/__init__.py#:~:text=class%20Operator) for more details.)
+The {py:meth}`~holoscan.core.Operator.setup` method aims to get the "operator's spec" by providing a {py:class}`~holoscan.core.OperatorSpec` object as a spec param. When {py:meth}`~holoscan.core.Operator.__init__` is called, it calls C++'s {cpp:func}`Operator::spec <holoscan::Operator::spec>` method (and also sets the {py:attr}`self.spec <holoscan.core.Operator.spec>` class member) and calls the {py:meth}`setup <holoscan.core.Operator.setup>` method so that the Operator's {py:attr}`~holoscan.core.Operator.spec` property holds the operator's specification. (See the [source code](https://github.com/nvidia-holoscan/holoscan-sdk/blob/main/python/holoscan/core/__init__.py#:~:text=class%20Operator) for more details.)
+
+
+Since the {py:meth}`~holoscan.core.Operator.setup` method can be called multiple times with other {py:class}`~holoscan.core.OperatorSpec` objects (e.g., to enumerate the operator's description), in the {py:meth}`~holoscan.core.Operator.setup` method, a user shouldn't initialize something.
+Such initialization needs to be done by overriding the {py:meth}`~holoscan.core.Operator.initialize` method.
+
+```python
+    def initialize(self):
+        pass
+```
 
-Since the {py:class}`~holoscan.core.Operator.setup` method can be called multiple times with other {py:class}`~holoscan.core.OperatorSpec` object (e.g., to enumerate the operator's description), in the {py:class}`~holoscan.core.Operator.setup` method, a user shouldn't initialize something in the {py:class}`~holoscan.core.Operator` object. Such initialization needs to be done in {py:class}`~holoscan.core.Operator.initialize` method. The {py:class}`~holoscan.core.Operator.__init__` method is for creating the Operator object and it can be used for initializing the operator object itself by passing miscellaneous arguments. Still, it doesn't 'initialize' the corresponding GXF entity object.
+The {py:meth}`~holoscan.core.Operator.__init__` method is for creating the Operator object and can be used to initialize the operator object itself by passing various arguments.
+Note that it doesn't initialize the corresponding GXF entity object. The underlying GXF entity object is initialized when the operator is scheduled for execution.
 
+Please do not forget to call the base class constructor (`super().__init__(fragment, *args, **kwargs)`) at the end of the `__init__` method.
 
 #### Creating a custom operator (Python)
 
@@ -1459,7 +1510,7 @@ def __init__(self, fragment, *args, width=4, unit_area=False, **kwargs):
 ```
 
 :::{note}
-As an alternative closer to C++, these parameters can be added through the {py:class}`~holoscan.core.OperatorSpec` attribute of the operator in its {py:func}`~holoscan.core.Operator.setup` method, where an associated string key must be provided as well as a default value:
+As an alternative closer to C++, these parameters can be added through the {py:class}`~holoscan.core.OperatorSpec` attribute of the operator in its {py:func}`~holoscan.core.Operator.setup` method, where an associated string key must be provided as well as a default value.
 
 ```py
 def setup(self, spec: OperatorSpec):
@@ -1467,6 +1518,8 @@ def setup(self, spec: OperatorSpec):
     spec.param("unit_area", False)
 ```
 
+The parameters can then be accessed on the `self` object in the operator's methods (including `initialize()`, `start()`, `compute()`, `stop()`) as `self.width` and `self.unit_area`.
+
 Other `kwargs` properties can also be passed to `spec.param`, such as `headline`, `description` (used by GXF applications), or `kind` (used when {ref}`retrieving-any-number-of-inputs-python`, which is deprecated since v2.3.0).
 :::
 
@@ -1672,7 +1725,7 @@ spec:
 :::
 
 
-##### Configuring input port queue size and message batch condition
+##### Configuring input port queue size and message batch condition (Python)
 
 If you want to receive multiple objects on a port and process them in batches, you can increase the queue size of the input port and set the `min_size` parameter of the `MessageAvailableCondition` condition to the desired batch size. This can be done by calling the `connector()` and `condition()` methods with the desired arguments, using the batch size as the `capacity` and `min_size` parameters, respectively.
 
@@ -1901,11 +1954,17 @@ This section complements the information above on basic input and output port co
 
 By default, both the input and output ports of an Operator will use a double-buffered queue that has a capacity of one message and a policy that is set to error if a message arrives while the queue is already full. A single `MessageAvailableCondition` ({cpp:class}`C++ <holoscan::gxf::MessageAvailableCondition>`/{py:class}`Python <holoscan.conditions.MessageAvailableCondition>`)) condition is automatically placed on the operator for each input port so that the `compute` method will not be called until a single message is available at each port. Similarly each output port has a `DownstreamMessageAffordableCondition` ({cpp:class}`C++ <holoscan::gxf::DownstreamMessageAffordableCondition>`/{py:class}`Python <holoscan.conditions.DownstreamMessageAffordableCondition>`) condition that does not let the operator call `compute` until any operators connected downstream have space in their receiver queue for a single message. These default conditions ensure that messages never arrive at a queue when it is already full and that a message has already been received whenever the `compute` method is called. These default conditions make it relatively easy to connect a pipeline where each operator calls compute in turn, but may not be suitable for all applications. This section covers how the default behavior can be overridden on request.
 
+It is possible to modify the global default queue policy via the `HOLOSCAN_QUEUE_POLICY` environment variable. Valid options (case insensitive) are:
+  - "pop": a new item that arrives when the queue is full replaces the oldest item
+  - "reject": a new item that arrives when the queue is discarded
+  - "fail": terminate the application if a new item arrives when the queue is full
+
+The default behavior is "fail" when `HOLOSCAN_QUEUE_POLICY` is not specified. If an operator's `setup` method explicitly sets a receiver or transmitter via the `connector` ({cpp:func}`C++ <holoscan::IOSpec::connector>`/{py:func}`Python <holoscan.core.IOSpec.connector>`) method as describe below, that connector's policy will not be overridden by the default.
+
 :::{note}
 Overriding operator port properties is an advanced topic. Developers may want to skip this section until they come across a case where the default behavior is not sufficient for their application.
 :::
 
-
 To override the properties of the queue used for a given port, the `connector` ({cpp:func}`C++ <holoscan::IOSpec::connector>`/{py:func}`Python <holoscan.core.IOSpec.connector>`) method can be used as shown in the example below. This example also shows how the `condition` ({cpp:func}`C++ <holoscan::IOSpec::condition>`/{py:func}`Python <holoscan.core.IOSpec.condition>`) method can be used to change the condition type placed on the Operator by a port. In general, when an operator has multiple conditions, they are AND combined, so the conditions on **all** ports must be satisfied before an operator can call `compute`.
 
 
diff --git a/docs/holoscan_create_operator_python_bindings.md b/docs/holoscan_create_operator_python_bindings.md
index 191d1a3..7c17fc5 100644
--- a/docs/holoscan_create_operator_python_bindings.md
+++ b/docs/holoscan_create_operator_python_bindings.md
@@ -16,6 +16,10 @@ There are several examples of bindings on Holohub in the [operators folder](http
 There are also several [examples of bindings](https://github.com/nvidia-holoscan/holoscan-sdk/tree/main/python/holoscan/operators) for the built-in operators of the SDK. Unlike on Holohub, for the SDK, the corresponding C++ [headers](https://github.com/nvidia-holoscan/holoscan-sdk/tree/main/include/holoscan/operators) and [sources](https://github.com/nvidia-holoscan/holoscan-sdk/tree/main/src/operators) of an operator are stored under separate directory trees.
 :::
 
+:::{warning}
+It is recommended to put any cleanup of resources allocated in the C++ operator's `initialize()` and/or `start()` methods into the `stop()` method of the operator and **not** in its destructor. This is necessary as a workaround to a current issue where it is not guaranteed that the destructor always gets called prior to Python application termination. The `stop()` method will always be explicitly called, so we can be assured that any cleanup happens as expected.
+:::
+
 (pybind11-operator-tutorial)=
 ## Tutorial: binding the ToolTrackingPostprocessorOp class
 
diff --git a/docs/holoscan_create_operator_via_decorator.md b/docs/holoscan_create_operator_via_decorator.md
index 278c5b5..0e474b3 100644
--- a/docs/holoscan_create_operator_via_decorator.md
+++ b/docs/holoscan_create_operator_via_decorator.md
@@ -150,7 +150,7 @@ When specifying the `inputs` and `outputs` arguments to `create_op`, please make
 
 ## Interoperability with wrapped C++ operators
 
-There SDK includes a [python_decorator example](https://github.com/nvidia-holoscan/holoscan-sdk/tree/main/examples/python_decorator) showing interoperability of wrapped C++ operators (`VideoStreamReplayerOp` and `HolovizOp`) alongside native Python operators created via the `create_op` decorator.
+There SDK includes a [python_decorator example](https://github.com/nvidia-holoscan/holoscan-sdk/tree/main/examples/python_decorator) showing interoperability of wrapped C++ operators (`VideoStreamReplayerOp` and `HolovizOp`) alongside native Python operators created via the `create_op` decorator. 
 
 The start of this application imports a couple of the built in C++-based operators with Python bindings (`HolovizOp` and `VideoStreamReplayerOp`). In addition to these, two new operators are created via the `create_op` decorator APIs.
 
diff --git a/docs/holoscan_debugging.md b/docs/holoscan_debugging.md
index ed69308..459984e 100644
--- a/docs/holoscan_debugging.md
+++ b/docs/holoscan_debugging.md
@@ -220,7 +220,7 @@ In cases where a distributed application using the UCX library encounters a segm
 
 While the default action is to print a backtrace on a segmentation fault, it may not always be helpful.
 
-For instance, if a segmentation fault is intentionally caused at line 129 in `examples/ping_distributed/cpp/ping_distributed_ops.cpp` (by adding `*(int*)0 = 0;`), running `./examples/ping_distributed/cpp/ping_distributed` will result in the following output:
+For instance, if a segmentation fault is intentionally caused at line 139 near the start of `PingTensorTxOp::compute` in `/workspace/holoscan-sdk/src/operators/ping_tensor_tx/ping_tensor_tx.cpp` (by adding `*(int*)0 = 0;`), running `./examples/ping_distributed/cpp/ping_distributed` will result in the following output:
 
 
 ```bash
@@ -252,88 +252,89 @@ By setting the `UCX_HANDLE_ERRORS` environment variable to `freeze,bt` and runni
 $ UCX_HANDLE_ERRORS=freeze,bt ./examples/ping_distributed/cpp/ping_distributed
 
 
-[holoscan:2127091:0:2127105] Caught signal 11 (Segmentation fault: address not mapped to object at address (nil))
-==== backtrace (tid:2127105) ====
- 0  /opt/ucx/1.15.0/lib/libucs.so.0(ucs_handle_error+0x2e4) [0x7f9995850264]
- 1  /opt/ucx/1.15.0/lib/libucs.so.0(+0x3045f) [0x7f999585045f]
- 2  /opt/ucx/1.15.0/lib/libucs.so.0(+0x30746) [0x7f9995850746]
- 3  /usr/lib/x86_64-linux-gnu/libc.so.6(+0x42520) [0x7f99949ee520]
- 4  ./examples/ping_distributed/cpp/ping_distributed(+0x103d2b) [0x55971617fd2b]
- 5  /workspace/holoscan-sdk/build-debug-x86_64/lib/libholoscan_core.so.1(_ZN8holoscan3gxf10GXFWrapper4tickEv+0x13d) [0x7f9996bfaafd]
- 6  /workspace/holoscan-sdk/build-debug-x86_64/lib/libgxf_core.so(_ZN6nvidia3gxf14EntityExecutor10EntityItem11tickCodeletERKNS0_6HandleINS0_7CodeletEEE+0x127) [0x7f99952cb487]
- 7  /workspace/holoscan-sdk/build-debug-x86_64/lib/libgxf_core.so(_ZN6nvidia3gxf14EntityExecutor10EntityItem4tickElPNS0_6RouterE+0x444) [0x7f99952cde44]
- 8  /workspace/holoscan-sdk/build-debug-x86_64/lib/libgxf_core.so(_ZN6nvidia3gxf14EntityExecutor10EntityItem7executeElPNS0_6RouterERl+0x3e9) [0x7f99952ce859]
- 9  /workspace/holoscan-sdk/build-debug-x86_64/lib/libgxf_core.so(_ZN6nvidia3gxf14EntityExecutor13executeEntityEll+0x41b) [0x7f99952cf0cb]
-10  /workspace/holoscan-sdk/build-debug-x86_64/lib/libgxf_serialization.so(_ZN6nvidia3gxf20MultiThreadScheduler20workerThreadEntranceEPNS0_10ThreadPoolEl+0x3c0) [0x7f9994f0cc50]
-11  /usr/lib/x86_64-linux-gnu/libstdc++.so.6(+0xdc253) [0x7f9994cb0253]
-12  /usr/lib/x86_64-linux-gnu/libc.so.6(+0x94ac3) [0x7f9994a40ac3]
-13  /usr/lib/x86_64-linux-gnu/libc.so.6(+0x126660) [0x7f9994ad2660]
+[holoscan:37   :1:51] Caught signal 11 (Segmentation fault: address not mapped to object at address (nil))
+==== backtrace (tid:     51) ====
+ 0  /opt/ucx/1.15.0/lib/libucs.so.0(ucs_handle_error+0x2e4) [0x7f9fc6d75264]
+ 1  /opt/ucx/1.15.0/lib/libucs.so.0(+0x3045f) [0x7f9fc6d7545f]
+ 2  /opt/ucx/1.15.0/lib/libucs.so.0(+0x30746) [0x7f9fc6d75746]
+ 3  /usr/lib/x86_64-linux-gnu/libc.so.6(+0x42520) [0x7f9fc803e520]
+ 4  /workspace/holoscan-sdk/build-x86_64/lib/libholoscan_op_ping_tensor_tx.so.2(_ZN8holoscan3ops14PingTensorTxOp7computeERNS_12InputContextERNS_13OutputContextERNS_16ExecutionContextE+0x53) [0x7f9fcad9e7f1]
+ 5  /workspace/holoscan-sdk/build-x86_64/lib/libholoscan_core.so.2(_ZN8holoscan3gxf10GXFWrapper4tickEv+0x155) [0x7f9fc9e415eb]
+ 6  /workspace/holoscan-sdk/build-x86_64/lib/libgxf_sample.so(_ZN6nvidia3gxf14EntityExecutor10EntityItem11tickCodeletERKNS0_6HandleINS0_7CodeletEEE+0x1a7) [0x7f9fc88f0347]
+ 7  /workspace/holoscan-sdk/build-x86_64/lib/libgxf_sample.so(_ZN6nvidia3gxf14EntityExecutor10EntityItem4tickElPNS0_6RouterE+0x460) [0x7f9fc88f29c0]
+ 8  /workspace/holoscan-sdk/build-x86_64/lib/libgxf_sample.so(_ZN6nvidia3gxf14EntityExecutor10EntityItem7executeElPNS0_6RouterERl+0x31e) [0x7f9fc88f31ee]
+ 9  /workspace/holoscan-sdk/build-x86_64/lib/libgxf_sample.so(_ZN6nvidia3gxf14EntityExecutor13executeEntityEll+0x2e7) [0x7f9fc88f39d7]
+10  /workspace/holoscan-sdk/build-x86_64/lib/libgxf_serialization.so(_ZN6nvidia3gxf20MultiThreadScheduler20workerThreadEntranceEPNS0_10ThreadPoolEl+0x419) [0x7f9fc8605dd9]
+11  /usr/lib/x86_64-linux-gnu/libstdc++.so.6(+0xdc253) [0x7f9fc8321253]
+12  /usr/lib/x86_64-linux-gnu/libc.so.6(+0x94ac3) [0x7f9fc8090ac3]
+13  /usr/lib/x86_64-linux-gnu/libc.so.6(clone+0x44) [0x7f9fc8121a04]
 =================================
 [holoscan:2127091:0:2127105] Process frozen, press Enter to attach a debugger...
 ```
 
-It is observed that the thread responsible for the segmentation fault is 2127105 (`tid:2127105`). To attach a debugger to this thread, simply press Enter.
+It is observed that the thread responsible for the segmentation fault is 51 (`tid:     51`). To attach a debugger to this thread, simply press Enter.
 
 Upon attaching the debugger, a backtrace will be displayed, but it may not be from the thread that triggered the segmentation fault. To handle this, use the `info threads` command to list all threads, and the `thread <thread_id>` command to switch to the thread that caused the segmentation fault.
 
 
 ```bash
 (gdb) info threads
-  Id   Target Id                                             Frame
-* 1    Thread 0x7f9997b36000 (LWP 2127091) "ping_distribute" 0x00007f9994a96612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
-  2    Thread 0x7f9992731000 (LWP 2127093) "ping_distribute" 0x00007f9994a96612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
-  3    Thread 0x7f9991f30000 (LWP 2127094) "ping_distribute" 0x00007f9994a96612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
-  4    Thread 0x7f999172f000 (LWP 2127095) "ping_distribute" 0x00007f9994a96612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
-  5    Thread 0x7f99909ec000 (LWP 2127096) "cuda-EvtHandlr"  0x00007f9994a96612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
-  6    Thread 0x7f99891ff000 (LWP 2127097) "async"           0x00007f9994a96612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
-  7    Thread 0x7f997d7cd000 (LWP 2127098) "ping_distribute" 0x00007f9994a96612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
-  8    Thread 0x7f997cfcc000 (LWP 2127099) "ping_distribute" 0x00007f9994a96612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
-  9    Thread 0x7f995ffff000 (LWP 2127100) "ping_distribute" 0x00007f9994a96612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
-  10   Thread 0x7f99577fe000 (LWP 2127101) "ping_distribute" 0x00007f9994a96612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
-  11   Thread 0x7f995f3e5000 (LWP 2127103) "ping_distribute" 0x00007f9994a96612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
-  12   Thread 0x7f995ebe4000 (LWP 2127104) "ping_distribute" 0x00007f9994a96612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
-  13   Thread 0x7f995e3e3000 (LWP 2127105) "ping_distribute" 0x00007f9994a9642f in __GI___wait4 (pid=pid@entry=2127631, stat_loc=stat_loc@entry=0x7f995e3ddd3c, options=options@entry=0, usage=usage@entry=0x0) at ../sysdeps/unix/sysv/linux/wait4.c:30
-```
-
-It's evident that thread ID 13 is responsible for the segmentation fault (`LWP 2127105`). To investigate further, we can switch to this thread using the command `thread 13` in GDB:
+  Id   Target Id                                        Frame 
+* 1    Thread 0x7f9fc6ce2000 (LWP 37) "ping_distribute" 0x00007f9fc80e6612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
+  2    Thread 0x7f9fc51bb000 (LWP 39) "ping_distribute" 0x00007f9fc80e6612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
+  3    Thread 0x7f9fc11ba000 (LWP 40) "ping_distribute" 0x00007f9fc80e6612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
+  4    Thread 0x7f9fbd1b9000 (LWP 41) "ping_distribute" 0x00007f9fc80e6612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
+  5    Thread 0x7f9fabfff000 (LWP 42) "cuda00001400006" 0x00007f9fc80e6612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
+  6    Thread 0x7f9f99fff000 (LWP 43) "async"           0x00007f9fc80e6612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
+  7    Thread 0x7f9f95ffe000 (LWP 44) "ping_distribute" 0x00007f9fc80e6612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
+  8    Thread 0x7f9f77fff000 (LWP 45) "dispatcher"      0x00007f9fc80e6612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
+  9    Thread 0x7f9f73ffe000 (LWP 46) "async"           0x00007f9fc80e6612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
+  10   Thread 0x7f9f6fffd000 (LWP 47) "worker"          0x00007f9fc80e6612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
+  11   Thread 0x7f9f5bfff000 (LWP 48) "ping_distribute" 0x00007f9fc80e6612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
+  12   Thread 0x7f9f57ffe000 (LWP 49) "dispatcher"      0x00007f9fc80e6612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
+  13   Thread 0x7f9f53ffd000 (LWP 50) "async"           0x00007f9fc80e6612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
+  14   Thread 0x7f9f4fffc000 (LWP 51) "worker"          0x00007f9fc80e642f in __GI___wait4 (pid=pid@entry=52, stat_loc=stat_loc@entry=0x7f9f4fff6cfc, options=options@entry=0, usage=usage@entry=0x0) at ../sysdeps/unix/sysv/linux/wait4.c:30
+```
+
+It's evident that thread ID 14 is responsible for the segmentation fault (`LWP 51`). To investigate further, we can switch to this thread using the command `thread 14` in GDB:
 
 ```bash
-(gdb) thread 13
+(gdb) thread 14
 ```
 
 After switching, we can employ the `bt` command to examine the backtrace of this thread.
 
 ```bash
 (gdb) bt
-#0  0x00007f9994a9642f in __GI___wait4 (pid=pid@entry=2127631, stat_loc=stat_loc@entry=0x7f995e3ddd3c, options=options@entry=0, usage=usage@entry=0x0) at ../sysdeps/unix/sysv/linux/wait4.c:30
-#1  0x00007f9994a963ab in __GI___waitpid (pid=pid@entry=2127631, stat_loc=stat_loc@entry=0x7f995e3ddd3c, options=options@entry=0) at ./posix/waitpid.c:38
-#2  0x00007f999584d587 in ucs_debugger_attach () at /opt/ucx/src/contrib/../src/ucs/debug/debug.c:816
-#3  0x00007f999585031d in ucs_error_freeze (message=0x7f999586ec53 "address not mapped to object") at /opt/ucx/src/contrib/../src/ucs/debug/debug.c:919
-#4  ucs_handle_error (message=0x7f999586ec53 "address not mapped to object") at /opt/ucx/src/contrib/../src/ucs/debug/debug.c:1089
-#5  ucs_handle_error (message=0x7f999586ec53 "address not mapped to object") at /opt/ucx/src/contrib/../src/ucs/debug/debug.c:1077
-#6  0x00007f999585045f in ucs_debug_handle_error_signal (signo=signo@entry=11, cause=0x7f999586ec53 "address not mapped to object", fmt=fmt@entry=0x7f999586ecf5 " at address %p") at /opt/ucx/src/contrib/../src/ucs/debug/debug.c:1038
-#7  0x00007f9995850746 in ucs_error_signal_handler (signo=11, info=0x7f995e3de3f0, context=<optimized out>) at /opt/ucx/src/contrib/../src/ucs/debug/debug.c:1060
+#0  0x00007f9fc80e642f in __GI___wait4 (pid=pid@entry=52, stat_loc=stat_loc@entry=0x7f9f4fff6cfc, options=options@entry=0, usage=usage@entry=0x0) at ../sysdeps/unix/sysv/linux/wait4.c:30
+#1  0x00007f9fc80e63ab in __GI___waitpid (pid=pid@entry=52, stat_loc=stat_loc@entry=0x7f9f4fff6cfc, options=options@entry=0) at ./posix/waitpid.c:38
+#2  0x00007f9fc6d72587 in ucs_debugger_attach () at /opt/ucx/src/contrib/../src/ucs/debug/debug.c:816
+#3  0x00007f9fc6d7531d in ucs_error_freeze (message=0x7f9fc6d93c53 "address not mapped to object") at /opt/ucx/src/contrib/../src/ucs/debug/debug.c:919
+#4  ucs_handle_error (message=0x7f9fc6d93c53 "address not mapped to object") at /opt/ucx/src/contrib/../src/ucs/debug/debug.c:1089
+#5  ucs_handle_error (message=0x7f9fc6d93c53 "address not mapped to object") at /opt/ucx/src/contrib/../src/ucs/debug/debug.c:1077
+#6  0x00007f9fc6d7545f in ucs_debug_handle_error_signal (signo=signo@entry=11, cause=0x7f9fc6d93c53 "address not mapped to object", fmt=fmt@entry=0x7f9fc6d93cf5 " at address %p") at /opt/ucx/src/contrib/../src/ucs/debug/debug.c:1038
+#7  0x00007f9fc6d75746 in ucs_error_signal_handler (signo=11, info=0x7f9f4fff73b0, context=<optimized out>) at /opt/ucx/src/contrib/../src/ucs/debug/debug.c:1060
 #8  <signal handler called>
-#9  holoscan::ops::PingTensorTxOp::compute (this=0x559716f26fa0, op_output=..., context=...) at ../examples/ping_distributed/cpp/ping_distributed_ops.cpp:129
-#10 0x00007f9996bfaafd in holoscan::gxf::GXFWrapper::tick (this=0x559716f6f740) at ../src/core/gxf/gxf_wrapper.cpp:66
-#11 0x00007f99952cb487 in nvidia::gxf::EntityExecutor::EntityItem::tickCodelet(nvidia::gxf::Handle<nvidia::gxf::Codelet> const&) () from /workspace/holoscan-sdk/build-debug-x86_64/lib/libgxf_core.so
-#12 0x00007f99952cde44 in nvidia::gxf::EntityExecutor::EntityItem::tick(long, nvidia::gxf::Router*) () from /workspace/holoscan-sdk/build-debug-x86_64/lib/libgxf_core.so
-#13 0x00007f99952ce859 in nvidia::gxf::EntityExecutor::EntityItem::execute(long, nvidia::gxf::Router*, long&) () from /workspace/holoscan-sdk/build-debug-x86_64/lib/libgxf_core.so
-#14 0x00007f99952cf0cb in nvidia::gxf::EntityExecutor::executeEntity(long, long) () from /workspace/holoscan-sdk/build-debug-x86_64/lib/libgxf_core.so
-#15 0x00007f9994f0cc50 in nvidia::gxf::MultiThreadScheduler::workerThreadEntrance(nvidia::gxf::ThreadPool*, long) () from /workspace/holoscan-sdk/build-debug-x86_64/lib/libgxf_serialization.so
-#16 0x00007f9994cb0253 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
-#17 0x00007f9994a40ac3 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
-#18 0x00007f9994ad2660 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
+#9  holoscan::ops::PingTensorTxOp::compute (this=0x5643fdcbd540, op_output=..., context=...) at /workspace/holoscan-sdk/src/operators/ping_tensor_tx/ping_tensor_tx.cpp:139
+#10 0x00007f9fc9e415eb in holoscan::gxf::GXFWrapper::tick (this=0x5643fdcfef00) at /workspace/holoscan-sdk/src/core/gxf/gxf_wrapper.cpp:78
+#11 0x00007f9fc88f0347 in nvidia::gxf::EntityExecutor::EntityItem::tickCodelet(nvidia::gxf::Handle<nvidia::gxf::Codelet> const&) () from /workspace/holoscan-sdk/build-x86_64/lib/libgxf_sample.so
+#12 0x00007f9fc88f29c0 in nvidia::gxf::EntityExecutor::EntityItem::tick(long, nvidia::gxf::Router*) () from /workspace/holoscan-sdk/build-x86_64/lib/libgxf_sample.so
+#13 0x00007f9fc88f31ee in nvidia::gxf::EntityExecutor::EntityItem::execute(long, nvidia::gxf::Router*, long&) () from /workspace/holoscan-sdk/build-x86_64/lib/libgxf_sample.so
+#14 0x00007f9fc88f39d7 in nvidia::gxf::EntityExecutor::executeEntity(long, long) () from /workspace/holoscan-sdk/build-x86_64/lib/libgxf_sample.so
+#15 0x00007f9fc8605dd9 in nvidia::gxf::MultiThreadScheduler::workerThreadEntrance(nvidia::gxf::ThreadPool*, long) () from /workspace/holoscan-sdk/build-x86_64/lib/libgxf_serialization.so
+#16 0x00007f9fc8321253 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
+#17 0x00007f9fc8090ac3 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
+#18 0x00007f9fc8121a04 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:100
 ```
 
-Under the backtrace of thread 13, you will find:
+Under the backtrace of thread 14, you will find:
 
 ```text
 #8  <signal handler called>
-#9  holoscan::ops::PingTensorTxOp::compute (this=0x559716f26fa0, op_output=..., context=...) at ../examples/ping_distributed/cpp/ping_distributed_ops.cpp:129
+#9  holoscan::ops::PingTensorTxOp::compute (this=0x5643fdcbd540, op_output=..., context=...) at /workspace/holoscan-sdk/src/operators/ping_tensor_tx/ping_tensor_tx.cpp:139
 ```
 
-This indicates that the segmentation fault occurred at line 129 in `examples/ping_distributed/cpp/ping_distributed_ops.cpp`.
+This indicates that the segmentation fault occurred at line 139 in `/workspace/holoscan-sdk/src/operators/ping_tensor_tx/ping_tensor_tx.cpp`.
 
 To view the backtrace of all threads, use the `thread apply all bt` command.
 
@@ -341,12 +342,12 @@ To view the backtrace of all threads, use the `thread apply all bt` command.
 (gdb) thread apply all bt
 
 ...
-Thread 13 (Thread 0x7f995e3e3000 (LWP 2127105) "ping_distribute"):
-#0  0x00007f9994a9642f in __GI___wait4 (pid=pid@entry=2127631, stat_loc=stat_loc@entry=0x7f995e3ddd3c, options=options@entry=0, usage=usage@entry=0x0) at ../sysdeps/unix/sysv/linux/wait4.c:30
+Thread 14 (Thread 0x7f9f4fffc000 (LWP 51) "worker"):
+#0  0x00007f9fc80e642f in __GI___wait4 (pid=pid@entry=52, stat_loc=stat_loc@entry=0x7f9f4fff6cfc, options=options@entry=0, usage=usage@entry=0x0) at ../sysdeps/unix/sysv/linux/wait4.c:30
 ...
 
-Thread 12 (Thread 0x7f995ebe4000 (LWP 2127104) "ping_distribute"):
-#0  0x00007f9994a96612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
+Thread 13 (Thread 0x7f9f53ffd000 (LWP 50) "async"):
+#0  0x00007f9fc80e6612 in __libc_pause () at ../sysdeps/unix/sysv/linux/pause.c:29
 ...
 
 ```
diff --git a/docs/holoscan_operators_extensions.md b/docs/holoscan_operators_extensions.md
index 0ecbdea..2febcf1 100644
--- a/docs/holoscan_operators_extensions.md
+++ b/docs/holoscan_operators_extensions.md
@@ -18,6 +18,8 @@ The operators below are defined under the `holoscan::ops` namespace for C++ and
 | **InferenceOp** | `inference` | {cpp:class}`C++ <holoscan::ops::InferenceOp>`/{py:class}`Python <holoscan.operators.InferenceOp>` |
 | **InferenceProcessorOp** | `inference_processor` | {cpp:class}`C++ <holoscan::ops::InferenceProcessorOp>`/{py:class}`Python <holoscan.operators.InferenceProcessorOp>` |
 | **PingRxOp** | `ping_rx` | {cpp:class}`C++ <holoscan::ops::PingRxOp>`/{py:class}`Python <holoscan.operators.PingRxOp>` |
+| **PingTensorRxOp** | `ping_tensor_rx` | {cpp:class}`C++ <holoscan::ops::PingTensorRxOp>`/{py:class}`Python <holoscan.operators.PingTensorRxOp>` |
+| **PingTensorTxOp** | `ping_tensor_tx` | {cpp:class}`C++ <holoscan::ops::PingTensorTxOp>`/{py:class}`Python <holoscan.operators.PingTensorTxOp>` |
 | **PingTxOp** | `ping_tx` | {cpp:class}`C++ <holoscan::ops::PingTxOp>`/{py:class}`Python <holoscan.operators.PingTxOp>` |
 | **SegmentationPostprocessorOp** | `segmentation_postprocessor` | {cpp:class}`C++ <holoscan::ops::SegmentationPostprocessorOp>`/{py:class}`Python <holoscan.operators.SegmentationPostprocessorOp>` |
 | **VideoStreamRecorderOp** | `video_stream_recorder` | {cpp:class}`C++ <holoscan::ops::VideoStreamRecorderOp>`/{py:class}`Python <holoscan.operators.VideoStreamRecorderOp>` |
diff --git a/docs/hsdk_faq.md b/docs/hsdk_faq.md
new file mode 100644
index 0000000..cca95d8
--- /dev/null
+++ b/docs/hsdk_faq.md
@@ -0,0 +1,1226 @@
+# Holoscan SDK FAQs
+
+## General
+
+**Q1: What is the Holoscan SDK?**
+
+A1: The Holoscan SDK is a comprehensive software development kit from NVIDIA designed for developing real-time AI applications, particularly in the healthcare sector. It includes acceleration libraries, pre-trained AI models, and reference applications for various medical imaging modalities like ultrasound, endoscopy, surgical robotics, and more.
+
+**Q2: What are the core components of the Holoscan SDK?**
+
+A2: The core components include:
+
+* **Application:** A collection of Fragments that acquire and process streaming data.A single fragment application executes in a single process while multi-fragment (distributed) applications can span multiple processes and/or physical nodes.
+* **Fragments:** Directed graphs of Operators, which can be allocated to physical nodes in a Holoscan cluster.
+* **Operators:** Basic units of work that process streaming data.
+* **Conditions:** Components that determine conditions under which a given Operator will be considered ready to execute.
+* **Resources:** Components that provide shared functionality which can be reused across operators. Examples are device memory pools, CUDA stream pools and components for serialization/deserialization of data.
+* **Ports:** An operator's input ports are used to receive data from upstream operators. Input ports consist of a receiver and any associated conditions. An operator's output ports are used to emit data to downstream operators. An output port consists of a transmitter and any associated Conditions.
+
+**Q3: How is the Holoscan SDK different from other SDKs?**
+
+A3: The Holoscan SDK is a domain and sensor agnostic SDK optimized for the easy construction and deployment of high-performance, high bandwidth, and real-time AI applications. By marrying high speed instruments to NVIDIA software, Holoscan is the platform for a future of self-driving, software defined, and scalable sensor processing solutions, touching industries from scientific computing and instrumentation to medical devices.
+
+
+## Installation and Setup
+
+**Q1: How do I install the Holoscan SDK?**
+
+A1: There are multiple ways to  install the Holoscan SDK:
+
+* Using NGC containers :
+  * For **dGPU** (x86_64, IGX Orin dGPU, Clara AGX dGPU, GH200)
+
+```
+docker pull nvcr.io/nvidia/clara-holoscan/holoscan:v2.4.0-dgpu
+```
+
+  * For **iGPU** (Jetson, IGX Orin iGPU, Clara AGX iGPU)
+
+```
+docker pull nvcr.io/nvidia/clara-holoscan/holoscan:v2.4.0-igpu
+```
+
+For more information, please refer to details and usage instructions on [**NGC**](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/clara-holoscan/containers/holoscan).
+
+* Using Debian packages
+
+```
+sudo apt update
+sudo apt install holoscan
+```
+
+If `holoscan` is not found, try the following before repeating the steps above:
+
+* **IGX Orin**: Ensure the [**compute stack is properly installed**](https://docs.nvidia.com/igx-orin/user-guide/latest/base-os.html\#installing-the-compute-stack) which should configure the L4T repository source. If you still cannot install the Holoscan SDK, use the [arm64-sbsa](https://developer.nvidia.com/holoscan-downloads?target_os=Linux\&target_arch=arm64-sbsa\&Compilation=Native\&Distribution=Ubuntu\&target_version=22.04\&target_type=deb_network) from the CUDA repository.
+* **Jetson**: Ensure [**JetPack is properly installed**](https://developer.nvidia.com/embedded/jetpack) which should configure the L4T repository source. If you still cannot install the Holoscan SDK, use the [aarch64-jetson](https://developer.nvidia.com/holoscan-downloads?target_os=Linux\&target_arch=aarch64-jetson\&Compilation=Native\&Distribution=Ubuntu\&target_version=22.04\&target_type=deb_network) from the CUDA repository.
+* **GH200**: Use the [arm64-sbsa](https://developer.nvidia.com/holoscan-downloads?target_os=Linux\&target_arch=arm64-sbsa\&Compilation=Native\&Distribution=Ubuntu\&target_version=22.04\&target_type=deb_network) from the CUDA repository.
+* **x86_64**: Use the [x86_64](https://developer.nvidia.com/holoscan-downloads?target_os=Linux\&target_arch=x86_64\&Distribution=Ubuntu\&target_version=22.04\&target_type=deb_network) from the CUDA repository.
+
+Please note that , to leverage the python module included in the debian package (instead of installing the python wheel), include the path below to your python path.
+
+```
+export PYTHONPATH="/opt/nvidia/holoscan/python/lib"
+```
+
+* Using Python wheels
+
+```
+pip install holoscan
+```
+
+For more details and troubleshooting , please refer to  [PyPI](https://pypi.org/project/holoscan).For x86_64, ensure that the [CUDA Runtime is installed](https://developer.nvidia.com/cuda-12-2-2-download-archive?target\_os=Linux\&target\_arch=x86\_64\&Distribution=Ubuntu\&target_version=22.04).
+
+If you are unsure of which installation option to use, please refer to the considerations below:
+
+* The **Holoscan container image on NGC** is the safest way to ensure all the dependencies are present with the expected versions (including Torch and ONNX Runtime), and should work on most Linux distributions. It is the simplest way to run the embedded examples, while still allowing you to create your own C++ and Python Holoscan applications on top of it. These benefits come at a cost:
+  * Large image size due to the numerous (some of them optional) dependencies. If you need a lean runtime image, see the **section below**.
+  * Standard inconveniences that exist when using Docker, such as more complex run instructions for proper configuration.
+* If you are confident in your ability to manage dependencies on your own in your host environment, the **Holoscan Debian package** should provide all the capabilities needed to use the Holoscan SDK, assuming you are using Ubuntu 22.04.
+* If you are not interested in the C++ API but just need to work in Python, or want to use a different version than Python 3.10, you can use the **Holoscan Python wheels** on PyPI. While they are the easiest solution for installing the SDK, they might require the most work to set up your environment with extra dependencies based on your needs. Finally, they are only formally supported on Ubuntu 22.04, though they should support other Linux distributions with glibc 2.35 or above.
+
+**Q2: What are the prerequisites for installing the Holoscan SDK?**
+
+A2: The prerequisites include:
+
+* If you are installing Holoscan SDK on a Developer kit, please refer to the details below
+
+| Developer Kit | User Guide  | OS | GPU mode |
+| :---- | :---- | :---- | :---- |
+| [NVIDIA IGX Orin](https://www.nvidia.com/en-us/edge-computing/products/igx/) | [Link](https://developer.nvidia.com/igx-orin-developer-kit-user-guide) to User Guide | [IGX Software](https://developer.nvidia.com/igx-downloads) 1.0 Production Release | iGPU **or**\* dGPU |
+| [NVIDIA Jetson AGX Orin and Orin Nano](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/jetson-orin/) | [Link](https://developer.nvidia.com/embedded/learn/jetson-agx-orin-devkit-user-guide/index.html) to User Guide | [JetPack](https://developer.nvidia.com/embedded/jetpack) 6.0 | iGPU |
+| [NVIDIA Clara AGX](https://www.nvidia.com/en-gb/clara/intelligent-medical-instruments) | [Link](https://github.com/nvidia-holoscan/holoscan-docs/blob/main/devkits/clara-agx/clara_agx_user_guide.md) to User Guide | [Holopack](https://developer.nvidia.com/drive/sdk-manager) 1.2 | iGPU **or**\* dGPU |
+
+* If you are installing Holoscan SDK on NVIDIA SuperChips, please note that HSDK 2.2 has only been tested with the Grace-Hopper SuperChip (GH200) with Ubuntu 22.04. Follow setup instructions [**here**](https://docs.nvidia.com/grace-ubuntu-install-guide.pdf).
+* If you are installing Holsocan SDK on Linux x86_64 workstations, please refer to the details below for supported distributions
+
+| OS | NGC Container | Debian/RPM Package | Python wheel | Build from source |
+| :---- | :---- | :---- | :---- | :---- |
+| Ubuntu 22.04 | Yes | Yes | Yes | No |
+| RHEL 9.x | Yes | No | No | No |
+| Other Linux distros | No | No | No | No |
+
+   For specific NVIDIA discrete GPU (dGPU) requirements, please check below :
+
+* Ampere or above recommended for best performance
+* [Quadro/NVIDIA RTX](https://www.nvidia.com/en-gb/design-visualization/desktop-graphics/) necessary for GPUDirect RDMA support
+* Tested with [NVIDIA Quadro RTX 6000](https://www.nvidia.com/content/dam/en-zz/Solutions/design-visualization/quadro-product-literature/quadro-rtx-6000-us-nvidia-704093-r4-web.pdf) and [NVIDIA RTX A6000](https://www.nvidia.com/en-us/design-visualization/rtx-a6000/)
+* [NVIDIA dGPU drivers](https://docs.nvidia.com/datacenter/tesla/tesla-installation-notes): 535 or above
+
+**Q3: Are there any additional setup steps required?**
+
+A3: Additional setup steps to achieve peak performance may include:
+
+* [Enabling RDMA](https://docs.nvidia.com/holoscan/sdk-user-guide/set_up_gpudirect_rdma.html)
+* [Enabling G-SYNC](https://docs.nvidia.com/holoscan/sdk-user-guide/enable_gsync.html)
+* [Disabling Variable Backlight](https://docs.nvidia.com/holoscan/sdk-user-guide/disable_variable_backlight.html)
+* [Enabling Exclusive Display Mode](https://docs.nvidia.com/holoscan/sdk-user-guide/enable_exclusive_display.html)
+* [Use both Integrated and Discrete GPUs on NVIDIA Developer Kits](https://docs.nvidia.com/holoscan/sdk-user-guide/use_igpu_with_dgpu.html)
+* [Deployment Software Stack](https://docs.nvidia.com/holoscan/sdk-user-guide/deployment_stack.html)
+
+
+## Getting Started
+
+**Q1: How do I get started with developing applications using the Holoscan SDK?**
+
+A1: To get started:
+
+1. Set up the SDK and your development environment.
+1. Follow the "Getting Started" [guide](https://docs.nvidia.com/holoscan/sdk-user-guide/holoscan_core.html) and tutorials provided in the SDK documentation.
+1. Explore the [example applications](https://github.com/nvidia-holoscan/holohub/tree/main/applications) in Holohub to understand the framework and its capabilities.
+
+**Q2: Are there any pre-trained models available in the SDK?**
+
+A2: Yes, the SDK includes pre-trained AI models for various medical imaging tasks such as segmentation, classification, and object detection. These models can be fine-tuned or used directly in your applications. For more details, please refer to the endoscopy tool tracking [example](https://github.com/nvidia-holoscan/holohub/tree/main/applications/endoscopy_tool_tracking) in Holohub and the body pose estimation [example](https://github.com/nvidia-holoscan/holohub/tree/main/applications/body_pose_estimation).
+
+
+## Development
+
+**Q1: How do I create a new application with the Holoscan SDK?**
+
+A1: To create a new application:
+
+1. Define the core concepts such as Operators, Fragments, and their interactions.
+1. Use the provided templates and examples as a starting point.
+
+For more details, please refer to the [Holoscan by Example section](./holoscan_by_example.md).
+
+**Q2: What are some example applications provided in the SDK?**
+
+A2: Example applications include:
+
+* Hello World: Basic introduction to the SDK.
+* Ping Simple and Ping Custom Op: Demonstrates simple data processing.
+* Video Replayer: Shows how to process and display video streams.
+* Medical imaging examples like ultrasound and endoscopy processing
+
+For a list of example applications, please visit the [Holoscan by Example section](./holoscan_by_example.md).
+
+**Q3: How can I integrate my own AI models into the Holoscan SDK?**
+
+A3: Integrating your own AI models involves:
+
+1. Converting your model to a compatible format (e.g., TensorRT, ONNX).
+1. Ensuring that your data preprocessing and postprocessing steps align with the model's requirements.
+
+For more information on how to bring your own model in Holsocan SDK and build an inference example, please refer to [this](https://github.com/nvidia-holoscan/holoscan-sdk/tree/main/examples/bring_your_own_model) example .
+
+**Q4: How can I update the VideoStreamReplayerOp and the VideoStreamRecorderOp to use a custom file format ?**
+
+A4: Holoscan SDK depends on GXF.GXF is using [Entity-Component-System](https://www.simplilearn.com/entity-component-system-introductory-guide-article) paradigm.
+Holoscan uses GXF as an execution engine and Holoscan's API abstracts Entity-Component-System and abstracts GXF node as Operator with input/output ports.
+
+![](images/image1.png)
+
+Most messages between Codelets(Operator in Holoscan) are also an entity object.
+An entity can hold multiple components/types. In VideoStreamReplayerOp and VideoStreamRecorderOp, an entity object that holds one or more GXF Tensor objects (as a component) is sent/received \- you can think that an entity as a dictionary of objects \-- \<key,object\> map.For VideoStreamReplayerOp and VideoStreamRecorderOp, it currently uses a custom file type ( `.gxf_entities` and `.gxf_index` files) to load and store a sequence of entity objects (in this case, an entity has one GXF Tensor object).
+ `.gxf_index` file include a file offset/timestamp information for each entity and `.gxf_entities` includes a series of (serialized) entity data.Serializing/deserializing an entity object is done by using `nvidia::gxf::EntitySerializer` class (with StdEntitySerializer implementation), and nvidia::gxf::FileStream endpoint.The official way to support GDS in GXF would be to extend nvidia::gxf::FileStream class so it uses cufile(GDS) internally.
+However, setting the development environment wouldn't be straightforward. This are the steps you would need to follow:
+
+* Update VideoStreamRecorderOp::compute() to use your own implementation to save an entity (as a single tensor) to the file system.
+  * Example
+
+```c
+auto in_message = op_input.receive<holoscan::TensorMap>("in").value();
+```
+
+* Update VideoStreamReplayerOp::compute() to use your own implementation to read the file (with the custom format) and emit it as an entity (holding tensor(s) as a component \-- it is called TensorMap).
+  * Example:
+
+```c
+nvidia::gxf::Expected<nvidia::gxf::Entity> out_message =
+      CreateTensorMap(context.context(),
+                      pool.value(),
+                      {{out_tensor_name_.get(),
+                        nvidia::gxf::MemoryStorageType::kDevice,
+                        out_shape,
+                        out_primitive_type_,
+                        0,
+                        nvidia::gxf::ComputeTrivialStrides(out_shape, dst_typesize)}},
+                      false);
+```
+
+* You need to update initialize() and other methods to get rid of nvidia::gxf::FileStream and nvidia::gxf::FileStream endpoint.For testing VideoStreamReplayerOp, you can just use VideoReplayerApp example.You can develop/test/create operator (release) binaries by following the user guide: [https://github.com/nvidia-holoscan/holoscan-sdk/blob/main/DEVELOP.md](https://github.com/nvidia-holoscan/holoscan-sdk/blob/main/DEVELOP.md)
+
+```c
+class VideoReplayerApp : public holoscan::Application {
+ public:
+  void compose() override {
+    using namespace holoscan;
+
+    // Sets the data directory to use from the environment variable if it is set
+    ArgList args;
+    auto data_directory = std::getenv("HOLOSCAN_INPUT_PATH");
+    if (data_directory != nullptr && data_directory[0] != '\0') {
+      auto video_directory = std::filesystem::path(data_directory);
+      video_directory /= "racerx";
+      args.add(Arg("directory", video_directory.string()));
+    }
+
+    // Define the replayer and holoviz operators and configure using yaml configuration
+    auto replayer =
+        make_operator<ops::VideoStreamReplayerOp>("replayer", from_config("replayer"), args);
+    auto visualizer = make_operator<ops::HolovizOp>("holoviz", from_config("holoviz"));
+
+    // Define the workflow: replayer -> holoviz
+    add_flow(replayer, visualizer, {{"output", "receivers"}});
+  }
+};
+
+
+```
+
+
+```
+./run build
+./run launch
+ # inside the container
+ ./examples/video_replayer/cpp/video_replayer
+```
+
+* As an alternative, you can create a separate Holoscan Operator and apply it with other sample applications (such as endoscopy tool tracking app) by following HoloHub's guide ([https://github.com/nvidia-holoscan/holohub](https://github.com/nvidia-holoscan/holohub)).You can also use Holoscan SDK's installation binary with [holoscan install dir](https://github.com/nvidia-holoscan/holohub?tab=readme-ov-file\#glossary) created by ./run build with Holoscan SDK repo.
+
+**Q5: How can I use the Inference Operator with Python tensor?**
+
+A5: The Inference Operator accepts holoscan::Entity or holoscan::TensorMap (similar to the dictionary of Array-like objects in Python) as an input message.
+
+For example, you can define an operator processing input video (as a tensor).You can find a more detailed example of this type of operator together with an example by referencing the tensor interop [example](https://github.com/nvidia-holoscan/holoscan-sdk/tree/main/examples/tensor_interop/python).
+
+    This operator has:
+        inputs:  "input_tensor"
+        outputs: "output_tensor"
+
+    The data from each input is processed by a CuPy gaussian filter and
+    the result is sent to the output.
+
+    ```
+    def compute(self, op_input, op_output, context):
+        # in_message is of dict
+        in_message = op_input.receive("input_tensor")
+
+        # smooth along first two axes, but not the color channels
+        sigma = (self.sigma, self.sigma, 0)
+
+        # out_message is of dict
+        out_message = dict()
+
+        for key, value in in_message.items():
+            print(f"message received (count: {self.count})")
+            self.count += 1
+
+            cp_array = cp.asarray(value)
+
+            # process cp_array
+            cp_array = ndi.gaussian_filter(cp_array, sigma)
+
+            out_message[key] = cp_array
+
+        op_output.emit(out_message, "output_tensor")
+    ```
+
+**Q6: Is there support in the Holoscan SDK, particularly for models written as Triton Python backends like NVIDIA's FoundationPose?**
+
+A6: Triton backends are not currently supported.The Inference Operator supports TensorRT (trt), ONNX Runtime (onnxrt), and Torch backends.
+
+ For more information on the Inference Operator please refer to the [section](https://docs.nvidia.com/holoscan/sdk-user-guide/inference.html) in the User Guide regarding the Inference Operator .
+
+**Q7: Can I directly use a .pth (PyTorch) model file with the Holoscan SDK's inference operator?**
+A7:No, you cannot use a .pth model file directly with the Holoscan SDK. Here's why and what you can do instead:
+
+1. Holoscan SDK's Torch backend is based on libtorch, which requires models to be in TorchScript format.
+1. Converting a .pth model to TorchScript is a manual process and cannot be done automatically within the SDK.
+1. For the best performance and ease of use, it's recommended to: a) Use a TensorRT (TRT) model if available. b) If you have an ONNX model, you can convert it to TRT automatically within the SDK.
+1. Using a TRT model (or converting from ONNX to TRT) will likely provide the fastest inference and be the easiest to set up with the Holoscan SDK.
+
+In summary, while direct .pth file usage isn't supported, converting to TensorRT or using ONNX with automatic TRT conversion are the recommended approaches for optimal performance and compatibility with the Holoscan SDK.
+
+**Q8: Can I use multiple models with the Inference Operator?**
+
+A8: Yes, you can use multiple models by specifying them in the `model_path_map` parameter. For more information, please refer to the [Parameters section](https://docs.nvidia.com/holoscan/sdk-user-guide/inference.html\#parameters-and-related-features) of the Inference Operator in the Holoscan User Guide.
+
+**Q9: How can I enable parallel inference for multiple models?**
+
+A9: Parallel inference is enabled by default. To disable it, set `parallel_inference`: false in the parameter set. For more information, please refer to the [Parameters section](https://docs.nvidia.com/holoscan/sdk-user-guide/inference.html\#parameters-and-related-features) of the Inference Operator in the Holoscan User Guide.
+
+**Q9: Can I use different backends for different models in the same application?**
+
+A9: Yes, you can specify different backends for different models using the `backend_map` parameter.For more information, please refer to the [Parameters section](https://docs.nvidia.com/holoscan/sdk-user-guide/inference.html\#parameters-and-related-features) of the Inference Operator in the Holoscan User Guide.
+
+**Q10: Can I perform inference on the CPU?**
+
+A10: Yes, you can perform inference on the CPU by setting  `infer_on_cpu`: true and use either the ONNX Runtime or PyTorch backend.For more information, please refer to the [Parameters section](https://docs.nvidia.com/holoscan/sdk-user-guide/inference.html\#parameters-and-related-features) of the Inference Operator in the Holoscan User Guide.
+
+**Q11:Can I control where the input and output data is stored (CPU vs GPU memory)?**
+
+A11: Yes, use the `input_on_cuda,` `output_on_cuda`, and `transmit_on_cuda` parameters to control data location.For more information, please refer to the [Parameters section](https://docs.nvidia.com/holoscan/sdk-user-guide/inference.html\#parameters-and-related-features) of the Inference Operator in the Holoscan User Guide.
+
+**Q12: How can I use the Optional flag?**
+
+A12: In Python, there are two ways to define parameter:
+
+* Using spec.param() method in Python's setup() method of the operator , usually done when wrapping the existing C++ operator.
+* Parameters are passed to the Constructor (`__init__()` ) directly. In Python there is no `try_get()` method in the parameter.
+  Instead, the default value is set to None, allowing us to check whether the parameter is set by users by verifying if the parameter value is None.
+
+**Q13:How can I define an Operator's creator for passing custom arguments?**
+
+A13:Feeding custom data to the constructor of an Operator in the `compose()` method is crucial. When you use the `make_operator<>()` template method in C++ or the Python Operator constructor, the `setup()` method is called internally, which prevents you from passing custom data (such as configuration values) after `make_operator<>()` is called. In C++, to pass non-condition/argument data to the constructor of a C++ Operator class, you need to define an additional constructor to accept your custom data. For example, you can define a constructor that accepts a `std::vector<std::string>` argument for the list of output port names as a second parameter.
+
+**Q14:How can I stop an application?**
+
+A14:There are two approaches to stopping an application:
+
+* using BooleanCondition on replayer operator
+
+```c
+
+    std::string op_name = "replayer";
+    std::string param_name = "boolean_scheduling_term";
+
+    // get the operator
+    holoscan::Operator* op = nullptr;
+    auto& app_graph = fragment()->graph();
+    if (!app_graph.is_empty()) { op = app_graph.find_node(op_name).get(); }
+    if (!op) {
+        HOLOSCAN_LOG_ERROR("Operator '{}' is not defined", op_name);
+        return;
+    }
+
+      // Stop executing compute() for 'replayer' operator
+
+    auto boolean_condition = op->condition<holoscan::BooleanCondition>(param_name);
+    boolean_condition->disable_tick();
+
+    // Stop executing compute() for this operator
+    boolean_condition = condition<holoscan::BooleanCondition>(param_name);
+    boolean_condition->disable_tick();
+    return;
+
+```
+
+To terminate the application smoothly, it is recommended to rely on the stop-on-deadlock feature in the scheduler. By default, the `stop_on_deadlock` parameter of `GreedyScheduler` is set to true. In case the `VideoReplayer` Operator stops, the entire pipeline will stop.
+
+* using interrupt()
+
+```c
+fragment()->executor().interrupt();
+```
+Please note that  using interrupt() forces to terminate the execution and can cause error messages,
+and the recommendation is using  deadlock-based approach.
+
+As an alternative, you can also use the `CountCondition`.Please refer to the [section](https://docs.nvidia.com/holoscan/sdk-user-guide/components/conditions.html#countcondition).
+At a high level, this is how attaching a `CountCondition` to an operator works:
+
+The operator starts in a READY state.
+Each time the operator executes, the count decreases by 1.
+When the count reaches 0, the operator's state changes to NEVER.
+In the NEVER state, the operator stops executing.
+
+For example, if you want to run the application 100 times and then stop it:
+
+```c
+auto my_operator = make_operator<MyOperator>("my_operator", make_condition<CountCondition>(100));
+```
+
+
+
+
+**Q15:How can I loop an output.emit() call within the operator?**
+
+A15: Each input or output port has its own queue. Internally, the process works as follows:
+
+1. Before the `compute()` method of an operator A is triggered, for each input port (usually backed by `DoubleBufferReceiver`), data (messages) in the backstage of the input port's queue are moved to the main stage of the queue. This is done using `router->syncInbox(entity)`.
+1. The `compute()` method of operator A is triggered.
+1. For each output port of operator A, data in the output port's queue are moved to the queue (backstage) of the downstream operator's input port using `router->syncOutbox(entity)`.
+
+By default, the queue capacity of the input/output port is set to 1, although this can be configured in the `setup()` method. This is why we cannot call `output.emit()` multiple times in a `compute()` method, as doing so can cause a `GXF_EXCEEDING_PREALLOCATED_SIZE` error.
+
+With the `GreedyScheduler`, which is the default scheduler using a single thread to trigger an operator's `compute()` method, no other operator can be scheduled until the `compute()` method of the current operator returns.
+
+To address this challenge, we might consider creating a utility method or class designed to accept a generator or iterator object. This approach would be particularly effective within a `compute()` method, especially if the operator is a source operator without input ports. It would enable the method to preserve the state of the input and either call `output.emit()` for each yielded value in a single `compute()` invocation or return without blocking the thread.
+
+The Python API code to override the connector would be something like this if we wanted a queue with capacity 20 and policy of "reject" (discard) the item if the queue is full:
+
+```py
+from holoscan.core import IOSpec
+
+# and then within the setup method define the output using the connector method like this
+
+spec.output("out1").connector(
+    IOSpec.ConnectorType.DOUBLE_BUFFER, capacity=20, policy=1
+)
+```
+
+For the policy options:
+
+* 0 \= pop (if the queue is full, remove an item from the queue to make room for the incoming one)
+* 1 \= reject (if the queue is full, reject the new item)
+* 2 \= fault (terminate the application if the queue is full and a new item is added)
+
+For completeness, to explicitly specify both the connector and its conditions, the syntax should be:
+
+```py
+# The default setting for an output should be equivalent to explicitly specifying
+spec.output("out1").connector(
+    IOSpec.ConnectorType.DOUBLE_BUFFER, capacity=1, policy=2
+).condition(
+    ConditionType.DOWNSTREAM_MESSAGE_AFFORDABLE, min_size=1, front_stage_max_size=1
+)
+
+```
+
+**Q16:  How can I add a green border and a small image to a corner to a Holoviz Operator?**
+
+A16: You can follow the Holoviz examples here:
+
+* Holoviz geometry example : [https://github.com/nvidia-holoscan/holoscan-sdk/blob/main/examples/holoviz/python/holoviz_geometry.py](https://github.com/nvidia-holoscan/holoscan-sdk/blob/main/examples/holoviz/python/holoviz_geometry.py) for border examples
+* Holoviz views example [https://github.com/nvidia-holoscan/holoscan-sdk/blob/main/examples/holoviz/python/holoviz_views.py](https://github.com/nvidia-holoscan/holoscan-sdk/blob/main/examples/holoviz/python/holoviz_views.py) for view/image example
+
+**Q17 : What is the difference between `setup` vs `initialize` vs `__init__` ?**
+
+A17: Since v0.6 release, Holoscan Operator does "lazy initialization" and Operator instance creation ( `super().__init__(*args, **kwargs)` ) doesn't initialize (calling  Operator.initialize(self) ) the corresponding GXF entity anymore.
+Currently, [setting the class members in Python](https://github.com/nvidia-holoscan/holoscan-sdk/blob/ccead5608b6f00d1c5b40465f68904d550c65236/python/holoscan/core/arg.cpp#L68) is done when Operator is [initialized by GXF Executor](https://github.com/nvidia-holoscan/holoscan-sdk/blob/ccead5608b6f00d1c5b40465f68904d550c65236/src/core/executors/gxf/gxf_executor.cpp#L1650).The purpose of setup method is for getting "operator's spec" by providing OperatorSpec object (spec param) to the method. When `__init__`  is called, it calls C++'s `Operator::spec(const std::shared_ptr<OperatorSpec>& spec)` method (and also sets `self.spec` class member), and call `setup` method so that Operator's `spec()` method hold the operator's specification.
+Since setup method can be called multiple times with other OperatorSpec object (e.g., to enumerate the description of the operator), in the setup method, user shouldn't initialize something in the Operator object.
+Such initialization needs to be done in initialize method.
+`__init__`  method is for creating Operator object. it can be used for initializing operator object itself by passing miscellaneous arguments, but it doesn't 'initialize' corresponding GXF entity object.
+
+**Q18:I’d like to use a CUDA stream allocated by the Holoscan SDK in a non-Holoscan library (OpenCV, CuPy, PyTorch). All these 3rd party libraries support CUDA streams, allocators etc. but they have different objects to represent that CUDA Stream (such as a `cupy.cuda.Stream`). I need to get the Holoscan CUDA stream and convert it to a `cupy.cuda.Stream` in a similar way a Holoscan Tensor is converted to a CuPy array with memory pointers.Please propose a solution.**
+
+A18:There is a CudaStreamHandler utility that works via GXF APIs in the C++ layer. We have not currently created a Python API to allow users to use it from the compute methods of native Python operators.In general, the underlying GXF library is currently refactoring how CUDA streams are handled and we plan to then improve the stream handling on Holoscan SDK after that.You can use CuPy or other 3rd party stream APIs within their own native operators and pass the stream objects as a Python object between your own native operators. I think this doesn't help with the issue you are facing as you want to reuse a stream allocated by some upstream wrapped C++ operator provided by the SDK there is currently no proper way to do that from Python.
+
+**Q19:What is the purpose of the `activation_map` parameter in the Holoscan Holoinfer operator?**
+A19: The `activation_map` parameter allows users to enable or disable model inferences dynamically at runtime. It can be used to decide on which frames to run inference for each model.
+
+**Q20:Is there an existing example or template that demonstrates the simultaneous use of integrated GPU (iGPU) and discrete GPU (dGPU) in a Holoscan application pipeline? Specifically, I am looking for a sample workflow that includes:**
+
+1. **Receiving and processing data on the iGPU of an AGX Orin**
+1. **Transferring the processed data to a dGPU**
+1. **Running multiple AI models on the dGPU**
+1. **Displaying results using the dGPU**
+
+A20:To leverage both the integrated GPU (iGPU) and discrete GPU (dGPU) on your IGX system with Holoscan, please refer to the [IGX user guide](https://docs.nvidia.com/igx-orin/user-guide/latest/igpu-dgpu.html). This guide provides detailed instructions on utilizing the iGPU in containers when the IGX developer kit is configured in dGPU mode.
+
+For Holoscan applications, there are two primary approaches to utilize both GPUs:
+
+1. Concurrent Application Execution: Run separate applications simultaneously, as outlined in the IGX documentation. The iGPU application must be executed within the Holoscan iGPU container, while the dGPU application can be run either natively or within the Holoscan dGPU container.
+1. Distributed Application: Develop a single distributed application that utilizes both GPUs by executing distinct fragments on the iGPU and dGPU respectively.
+
+To illustrate the second approach, consider the following example using the 'ping' distributed application. This demonstrates communication between the iGPU and dGPU using Holoscan containers:
+
+```
+COMMON_DOCKER_FLAGS="--rm -i --init --net=host
+--runtime=nvidia -e NVIDIA_DRIVER_CAPABILITIES=all
+--cap-add CAP_SYS_PTRACE --ipc=host --ulimit memlock=-1 --ulimit stack=67108864
+"
+HOLOSCAN_VERSION=2.2.0
+HOLOSCAN_IMG="nvcr.io/nvidia/clara-holoscan/holoscan:v$HOLOSCAN_VERSION"
+HOLOSCAN_DGPU_IMG="$HOLOSCAN_IMG-dgpu"
+HOLOSCAN_IGPU_IMG="$HOLOSCAN_IMG-igpu"
+
+# Pull necessary images
+docker pull $HOLOSCAN_DGPU_IMG
+docker pull $HOLOSCAN_IGPU_IMG
+
+# Execute ping distributed (Python) in dGPU container
+# Note: This instance serves as the 'driver', but the iGPU could also fulfill this role
+# The '&' allows for non-blocking execution, enabling subsequent iGPU command
+docker run \
+ $COMMON_DOCKER_FLAGS \
+ $HOLOSCAN_DGPU_IMG \
+ bash -c "python3 ./examples/ping_distributed/python/ping_distributed.py --gpu --worker --driver" &
+
+# Execute ping distributed (C++) in iGPU container
+docker run \
+ $COMMON_DOCKER_FLAGS \
+ -e NVIDIA_VISIBLE_DEVICES=nvidia.com/igpu=0 \
+ $HOLOSCAN_IMG-igpu \
+ bash -c "./examples/ping_distributed/cpp/ping_distributed --gpu --worker"
+
+```
+
+**Q21:Is there an efficient method to configure Holoscan to enable concurrent processing pipelines? My objective is to implement a system where frame acquisition and processing can occur simultaneously. Specifically, I aim to initiate the reading of a subsequent frame while the current frame is still undergoing processing through the InferenceOp.To illustrate:**
+
+![](images/image2.png)
+
+1. **Is it possible to begin reading Frame N+1 while Frame N is still being processed by the InferenceOp?**
+1. **Or does Holoscan require the completion of all operations on Frame N before initiating any operations on Frame N+1?**
+
+**If concurrent processing is achievable, what would be the recommended approach to implement such a system within the Holoscan framework?**
+
+A21: The NVIDIA GXF framework provides a `nvidia::gxf::BroadcastCodelet` with a "round robin" mode that offers an alternative to the standard broadcast behavior. This mode sequentially directs input messages to different output ports in rotation. While this functionality was accessible in Holoscan 2.1 through the GXFCodeletOp, we could also develop a native operator that provides equivalent utility.
+
+The GXF source defines the modes as follows:
+
+```
+enum struct BroadcastMode {
+  kBroadcast = 0,   // publishes incoming message to all transmitters
+  kRoundRobin = 1,  // publishes incoming message to one transmitter in round-robin fashion
+};
+
+```
+
+For the gathering operation, GXF implements the `nvidia::gxf::Gather` codelet. This codelet transfers any messages from the receive ports to the output port. The `tick_source_limit` parameter can be configured to cease checking for additional received messages on other ports once a specified maximum number of messages to output has been reached.
+
+It's important to note that the GXF Gather codelet may not inherently preserve the order in which inference operations were called. While messages might naturally be processed in the order they were received if inference operations complete sequentially, this behavior is not guaranteed.
+
+To ensure strict ordering, we could develop a custom native operator. This operator would sequentially check for messages on each port, beginning with port 1, and only move to the next port once a message has been received on the current port. This approach would guarantee that frames are processed and displayed in the correct order.
+
+This custom implementation would provide greater control over the message flow and ensure the integrity of the processing pipeline, particularly in scenarios where maintaining the original frame order is critical.
+
+**Q22: How can I use other libraries in my Holoscan SDK application pipeline?**
+A22: Refer to the dedicated [HoloHub tutorial](https://github.com/nvidia-holoscan/holohub/tree/main/tutorials/integrate_external_libs_into_pipeline) for an overview of how to use external C++ or Python libraries in your custom Holoscan SDK application.
+
+**Q23: How can I ensure proper data flow and handling in a Holoscan pipeline with branching paths, especially when inline updates are performed on shared data?**
+
+A23:In a Holoscan pipeline with branching paths, such as:
+
+```
+A -> B -> C -> D
+      \
+       -> E
+```
+
+There are several considerations and potential solutions to ensure proper data flow and handling, especially when operators like C or D perform inline updates to the data.
+
+1. Data Access Guarantee: E is guaranteed to access the data egressing from B. However, in a multithreaded scheduler, careful attention must be paid to potential data race conditions.
+1. Execution Order: The current implementation maintains the order of root nodes, but the creation of GXF connections between B \-\> C and B \-\> E is randomly determined due to the use of `std::unordered_map` as the graph data structure. This randomness can affect which connection is prioritized in GXF.
+1. Potential Solutions: a. PeriodicCondition: One approach is to use a PeriodicCondition to control the execution timing of operators. Here's an example:
+
+```
+from holoscan.conditions import CountCondition, PeriodicCondition
+from holoscan.core import Application
+from holoscan.operators import PingRxOp, PingTxOp
+
+class MyPingApp(Application):
+    def compose(self):
+        b = PingTxOp(self, CountCondition(self, 10), name="B")
+        c = PingRxOp(self, PeriodicCondition(self, 20_000_000), name="C")
+        e = PingRxOp(self, name="E")
+
+        self.add_flow(b, c)
+        self.add_flow(b, e)
+
+```
+
+In this example, the PeriodicCondition is used to ensure that C executes only after a specified period (20 milliseconds in this case) has elapsed. This can help control the timing of data processing between different branches. b. Custom Operator: Developing a custom native operator could provide more control over the message flow. This operator could sequentially check for messages on each port, ensuring that frames are processed and displayed in the correct order. c. Data Copying: To avoid issues with inline updates affecting shared data, consider implementing a mechanism to create copies of the data for each branch. This ensures that modifications in one branch don't unintentionally affect the other.
+
+1. Limitations and Considerations:
+* The GXF Gather codelet may not inherently preserve the order in which inference operations were called.
+* When using mock objects for testing (as mentioned in the background information), modifying the original pipeline structure might be challenging. In such cases, focusing on data copying or careful timing control might be more feasible.
+1. Future Improvements: Updating the graph structure to use std::map instead of `std::unordered_map` for `succ_` and `pred_` could potentially provide more predictable behavior in terms of connection creation order.
+
+**Q24:I'm inquiring about the availability of a Holoscan example that demonstrates distributed processing across two networked computers. Specifically, I'm interested in a scenario where:**
+
+1. **One computer captures a frame, potentially using an AJA capture card.**
+1. **The captured frame is then transferred over a local network to a second computer.**
+1. **The second computer receives and displays the frame.**
+
+**Additionally, I have some questions regarding the networking aspects of such a setup:**
+
+1. **Does Holoscan provide flexibility in selecting the transport layer for this inter-computer communication?**
+1. **Is it possible to utilize WebRTC as the transport protocol in this scenario?**
+
+A24: There are two relevant approaches:
+
+1. WebRTC Implementation: A reference application demonstrating WebRTC-based video streaming is available in the HoloHub repository. You can find this example at: [https://github.com/nvidia-holoscan/holohub/tree/main/applications/webrtc_video_server](https://github.com/nvidia-holoscan/holohub/tree/main/applications/webrtc_video_server) This application showcases how WebRTC can be utilized for inter-computer communication within the Holoscan framework.
+1. Distributed Application Approach: An alternative method involves creating a distributed application with separate fragments running on each node. For more information, please refer to the [section](https://docs.nvidia.com/holoscan/sdk-user-guide/holoscan_create_distributed_app.html) in the User guide on Creating Distributed applications .
+
+**Q25: How can I use `run_async()` to launch an application in a separate thread and stop the application?**
+
+A25:We can set the event state to `EVENT_WAITING` (request sent to an async service, pending event done notification) and then `EVENT_DONE` (event done notification received, entity ready to be ticked) to allow a specific operator to wait/resume its operation. In the example, it calls `AsynchronousCondition::event_state(AsynchronousEventState::EVENT_NEVER)` to set the status of the condition to NEVER. (BooleanCondition does the same by setting the status of the condition to NEVER when `BooleanCondition::disable_tick()` is called). This means the operator does not want to be ticked again (end of execution).Once the state of the condition goes to NEVER (internally, SchedulingConditionType::NEVER), it marks the end of execution and cannot be undone.
+
+**Q26:Are there any existing applications or examples in the Holoscan ecosystem that demonstrate the ability to utilize multiple GPUs concurrently within a single application?**
+
+A26:The multi ai ultrasound application has [settings](https://github.com/nvidia-holoscan/holohub/blob/f5f27b85b68bc8fcd62239ed56da2cef29a3d45f/applications/multiai_ultrasound/cpp/mgpu_multiai_ultrasound.yaml#L65) for multi GPU in a different YAML file. It can be controlled by the inference parameters.
+
+**Q27: What is the role of a scheduler in Holoscan?**
+
+A27: The scheduler is responsible for determining when each operator in an application will execute.
+
+**Q28: How many types of schedulers are available in the Holoscan SDK?**
+
+A28: There are three [schedulers](https://docs.nvidia.com/holoscan/sdk-user-guide/components/schedulers.html) available: Greedy Scheduler, Multi-Thread Scheduler, and Event-Based Scheduler.
+
+**Q29:Which scheduler is used by default for non-distributed applications?**
+
+A29:Non-distributed applications use the Greedy Scheduler by default.
+
+**Q30:What is the main characteristic of the Greedy Scheduler?**
+
+A30: The Greedy Scheduler has only a single worker thread that executes operators sequentially in a deterministic order.
+
+**Q31:How does the Multi-Thread Scheduler work?**
+
+A31: It's a polling-based scheduler with a user-defined number of worker threads and a dedicated thread that polls operators at a user-defined interval.
+
+**Q32:What is unique about the Event-Based Scheduler?**
+
+A32:The Event-Based Scheduler waits for events indicating changes in operator readiness, rather than constantly polling.
+
+**Q33:How can the Event-Based Scheduler reduce CPU overhead?**
+
+A33:By eliminating the need for constant polling, it can significantly reduce CPU usage compared to the Multi-Thread Scheduler in certain scenarios.
+
+**Q34:In what situations do Multi-thread and Event-Based Schedulers show benefits?**
+
+A34:They show benefits in scenarios with multiple operators that can run simultaneously, potentially providing significant speedup compared to the Greedy Scheduler.
+
+**Q35:How do the Multi-Thread and Event-Based Schedulers compare in terms of performance?**
+
+A35:They often have similar runtime performance, but the Event-Based Scheduler tends to have lower CPU overhead on average.
+
+**Q36:Are there scenarios where using multi-thread schedulers might not be beneficial?**
+
+A36: Yes, for linear inference pipelines or applications with minimal computation per operator, multi-thread schedulers might not provide significant benefits and could even introduce overhead.
+
+**Q37: How does the number of worker threads affect performance in multi-thread schedulers?**
+
+A37:  Increasing the number of worker threads can improve performance up to a point, but it also increases CPU usage.
+
+
+## Performance
+
+**Q1: What performance tools are available in Holoscan SDK?**
+
+A1: Holoscan SDK provides several performance tools, including Data Flow Tracking, GXF job statistics, and a Video Pipeline Latency Tool.
+
+**Q2: What is Data Flow Tracking in Holoscan SDK?**
+
+A2: Data Flow Tracking is a mechanism to profile applications and analyze fine-grained timing properties and data flow between operators in a fragment's graph.For more detailed information, please refer to the [Data Flow Tracking section](https://docs.nvidia.com/holoscan/sdk-user-guide/flow_tracking.html) in the Holoscan User Guide.
+
+**Q3: How do I enable Data Flow Tracking in my Holoscan application?**
+
+A3:You can enable Data Flow Tracking by calling the `track()` method in C++ or using the `Tracker` class in Python before running your application.
+
+**Q4: What metrics can I retrieve using Data Flow Tracking?**
+
+A4: You can retrieve metrics such as maximum, average, and minimum end-to-end latencies, as well as the number of messages sent from root operators.
+
+**Q5: How can I customize Data Flow Tracking?**
+
+A5:You can customize Data Flow Tracking by configuring parameters such as the number of messages to skip at the start and end, and setting a latency threshold to ignore outliers.
+
+**Q6: How do I enable GXF job statistics?**
+
+A6:You can enable GXF job statistics by setting the environment variable `HOLOSCAN_ENABLE_GXF_JOB_STATISTICS` to true.
+
+**Q7: Can I save GXF job statistics to a file?**
+
+A7:Yes, you can save GXF job statistics to a JSON file by setting the `HOLOSCAN_GXF_JOB_STATISTICS_PATH` environment variable. For more information on the GXF job statistics, please refer to [this](https://docs.nvidia.com/holoscan/sdk-user-guide/gxf_job_statistics.html) section in the User Guide.
+
+**Q8: How does the Video Pipeline Latency Tool work?**
+
+A8:It generates a sequence of known video frames, transfers them back to an input component using a physical loopback cable, and measures the latency at various stages of the process.
+
+**Q9: Can the Video Pipeline Latency Tool simulate GPU workload?**
+
+A9:Yes, the tool has an option to simulate GPU workload by running an arbitrary CUDA loop a specified number of times before frame generation. For more information on the Video Pipeline LAtency Tool , please refer to [this](https://docs.nvidia.com/holoscan/sdk-user-guide/latency_tool.html) section in the User Guide.
+
+**Q10: What types of producers and consumers are supported by the Video Pipeline Latency Tool?**
+
+A10:The tool supports various producers (OpenGL GPU Direct Rendering, GStreamer GPU Rendering, AJA Video Systems) and consumers (V4L2, GStreamer, AJA Video Systems) for different video input/output scenarios.
+
+**Q11:How do NVTX markers work in Holoscan SDK?**
+
+A11:This is how NVTX markers added to a Holoscan application work:
+
+* The multithreaded scheduler starts a worker thread, checks the status of entities (also known as Holoscan Operators), and executes each entity using `EntityExecutor::executeEntity()` method in GXF.
+* `EntityExecutor::executeEntity()` calls the `EntityExecutor::EntityItem::execute()` GXF method for the given entity ID
+* The `EntityExecutor::EntityItem::execute()` method checks the scheduling status of the entity (Holoscan Operator) and then calls the `EntityExecutor::EntityItem::tick()` method in GXF
+* In the EntityExecutor::EntityItem::tick() method it is where the annotation happens, the following steps occur:
+1. router-\>syncInbox(entity); is called to synchronize the inbox. For example, for the given Holoscan operator, UCXReceiver (input port) receives data from the network and pushes it into the queue in the UCXReceiver object. Data in the queue can be retrieved by calling the receive() method within the Operator::compute() method.
+1. For each codelet in the entity (in Holoscan, an entity can have only one codelet), EntityExecutor::EntityItem::tickCodelet() is called, which in turn calls Codelet::tick() (in Holoscan, this is the Operator::compute() method) (Figure 5).
+1. router-\>syncOutbox(entity); is called to synchronize the outbox. For example, for the given Holoscan operator, the data pushed to the queue in the UCXTransmitter object (output port) via emit() method calls in the Operator::compute() method is sent to the network using UCX.
+
+During these calls, the system measures statistics, executes monitors (if any), and executes controllers (if any).
+
+It is important to note that the tick codelet NVTX annotation doesn't cover router-\>syncInbox(entity); and router-\>syncOutbox(entity);. This means that the time range captured by NVTX measures only the duration for executing the Codelet::tick(). The time for sending and receiving data via UCXTransmitter/UCXReceiver is not measured by looking at the annotation range.
+
+**Q12:During the performance analysis of my Holoscan application, I've observed significant latency issues that are negatively impacting real-time performance. I've compiled a timing breakdown for each operator in the pipeline, which I've included below for reference.**
+
+**Initially, I had assumed that each Holoscan Operator processed frames independently and concurrently. However, my observations suggest that the entire pipeline is processing each frame sequentially, which appears to be suboptimal for my real-time requirements.**
+
+**Currently, my visualization component is only achieving approximately 15 fps, which falls short of my performance target. Given that my pipeline has a total execution time of approximately 70ms, I'm concerned that it may only be capable of processing one frame every 70ms.**
+
+**Could you provide more detailed information about the implementation and potential benefits of Schedulers in Holoscan (as referenced in the NVIDIA documentation on Schedulers)? Specifically, I'm interested in understanding if and how Schedulers could be leveraged to address my performance concerns.**
+
+**Here's the timing breakdown for each operator in my pipeline:**
+
+* **Replayer: 24.145 ms**
+* **ImageProcessing: 18.289 ms**
+* **Preprocessor: 1.213 ms**
+*  **Inference: 23.861 ms**
+*  **Postprocessor: 0.275 ms**
+*  **PostImageProcessing: 2.695 ms**
+*  **Viz: 1.575 ms**
+
+A12: The following scheduler mechanisms can potentially impact the performance of your application:
+
+1. Frame Processing Parallelization: The Multi-threaded Scheduler (MTS) and Event-based Scheduler (EBS) are designed to enable concurrent processing of multiple frames. These schedulers allow the initiation of processing for subsequent frames while preceding frames are still being processed by other operators in the pipeline.
+1. Latency vs. Throughput Considerations: It's important to distinguish between end-to-end latency and throughput in the context of application performance. While MTS and EBS can potentially enhance the overall throughput of an application (defined as the number of frames processed per unit time), they do not necessarily reduce the end-to-end latency. End-to-end latency refers to the time required for a single frame to traverse the entire pipeline from source to sink.
+1. Current Scheduler Implementation Status: Please note that the MTS and EBS are currently undergoing optimization. In their present state, they may exhibit higher latency compared to the greedy scheduler.
+1. Inter-Operator Dependencies: It's crucial to understand that operators within a pipeline do not function in complete isolation. The pipeline architecture incorporates double-buffer queues between operators, with scheduling conditions applied to these queues. This design introduces data dependencies between adjacent nodes in the application graph, which influences the overall execution flow and timing.
+
+**Q13:How can I improve the performance of VideoStreamRecorderOp by reusing GPU memory?**
+
+A13:The VideoStreamReplayerOp can reuse the CUDA device buffers and avoid alloc/free for each frame by using [BlockMemoryPool](https://docs.nvidia.com/holoscan/sdk-user-guide/components/resources.html\#blockmemorypool).
+
+The VideoStreamReplayerOp does not have a parameter (such as allocator) to use a custom allocator, even though the user can specify `entity_serializer`.
+
+* The current implementation always uses holoscan::StdEntitySerializer and holoscan::StdComponentSerializer with UnboundedAllocator, regardless of the user-specified `entity_serializer` parameter.
+* The storage type of the tensor (GPU or CPU) created by the VideoStreamReplayerOp depends on the input video file to which the tensor object is serialized. Therefore, without updating the `holoscan::StdComponentSerializer` implementation, VideoStreamReplayerOp cannot blindly use a specific memory pool allocator that requires memory storage type, memory pool size, etc.
+
+**Q14: I've observed some CUPVA-CUDA interop-related latencies in this application that are not present in our CUPVA test applications. One notable difference between the Holohub application and the CUPVA test application lies in the method of CUDA stream creation.**
+
+**In the CUPVA test application, we create the CUDA stream as follows:**
+
+```
+cudaStreamCreateWithFlags(&cuStream, cudaStreamNonBlocking)
+```
+
+**In contrast, the Holohub application utilizes a CudaStreamPool, created in this manner:**
+
+```
+const std::shared_ptr<CudaStreamPool> cuda_stream_pool = make_resource<CudaStreamPool>("cuda_stream", 0, 0, 0, 1, 5);
+// or
+const std::shared_ptr<CudaStreamPool> cuda_stream_pool = make_resource<CudaStreamPool>("cuda_stream", 0, cudaStreamNonBlocking, 0, 1, 5);
+
+```
+
+**The CUDA stream is then retrieved in the compute API using:**
+
+```
+auto cudaStream = cuda_stream_handler_.get_cuda_stream(context.context());
+```
+
+ **I would like to clarify the following points:**
+
+1. **When calling `get_cuda_stream(..)`, will the default stream be utilized, or will it be a non-default stream?**
+1. **Is there a method to ensure that the default CUDA stream is not used in the pool, given that we currently lack support for it in CUPVA?**
+
+A14:It is important to know that CUDA stream management in both GXF and Holoscan is currently in a state of evolution.
+
+In the current Holoscan implementation, CUDA streams are managed through the `holoscan::CudaStreamHandler` class. This class offers utility methods to define the operator specification with `CudaStreamPool`.
+
+To utilize non-default streams, the application's `compose()` method should create a `cuda_stream_pool` as follows:
+
+```
+const std::shared_ptr<CudaStreamPool> cuda_stream_pool = make_resource<CudaStreamPool>("cuda_stream", 0, cudaStreamNonBlocking, 0, 1, 5);
+```
+
+Note that using `1` or `cudaStreamNonBlocking` for the flag parameter ensures the use of non-default streams.
+
+For proper CUDA stream creation and sharing across operators in the application workflow:
+
+1. In the Application's `compose()` method:
+   * Create `CudaMemoryPool` and pass it as a parameter to the operators in the workflow graph.
+1. For each operator in the workflow:
+   * Define a `holoscan::CudaStreamHandler` member variable in the operator class (e.g., `cuda_stream_handler_`).
+   * Call `cuda_stream_handler_.define_params(spec);` in the `setup(OperatorSpec& spec)` method.
+   * In the `compute()` method, use the following calls:
+     * `cuda_stream_handler_.from_message(context.context(), in_message);` to retrieve CUDA stream information from the input message.
+     * `cuda_stream_handler_.get_cuda_stream(context.context());` to obtain the CUDA stream.
+     * `cuda_stream_handler_.to_message(out_message);` to set the currently used CUDA stream to the output message for subsequent operators.
+
+Regarding your specific concern about forcing non-use of the default CUDA stream in the pool due to lack of support in CUPVA, there are two potential approaches:
+
+1. Ensure that your application uses an appropriate CUDA stream pool configuration.
+1. Implement error handling or exceptions at the application/operator level to prevent the use of the default CUDA stream.
+
+It's worth noting that the `VideoReplayerOp` currently doesn't support this CUDA stream handling. Consideration is being given to supporting it alongside CUDA memory pool support in future updates.
+
+**Q15:I'm seeking to understand the memory allocation strategies employed within the Holoscan framework. Specifically, I'd like clarification on the following points:**
+
+1. **How is memory allocated across different components of Holoscan?**
+1. **What is the timing of memory allocation in relation to the compute() method execution? Is memory allocated: a) Each time compute() is called, or b) Once at the beginning of execution and then reused?**
+1. **What types of CUDA memory are utilized in Holoscan operations? Specifically: a) Is pinned memory used? b) Is CUDA managed memory employed? c) Do all memory exchanges remain within device (GPU) memory?**
+
+
+A15:Memory allocation can be done either once and reused or separately on each compute call. It depends on how the user write the compute method. We provide a BlockMemoryPool allocator class that allows reusing the same memory blocks on each call.  Similarly there is ability to use CUDA streams and asynchronous memory allocation calls (CudaStreamPool). We hope to refactor over the coming months to make these easier to use than they are currently, but the capability is there now. BlockMemoryPool currently uses on device memory only. There is an UnboundedAllocator that can allocate on one of three places
+
+* system memory (i.e. C++ new/delete)
+* pinned host memory (cudaMallocHost / cudaFreeHost)
+* device memory (cudaMalloc / cudaFree)
+
+
+
+**Q16: I'm running  the endoscopy tool tracking application with a configuration that separates compute and graphics operations onto two distinct GPUs. I have a query about the data transfer mechanism between these GPUs:**
+
+1. **Is there an explicit use of memcpy for transferring data from the compute GPU to the graphics GPU?**
+1. **In my analysis of the nsys profiler report, I've observed `MemcpyDToH` and `MemcpyHToD` operations. This leads me to question whether the inter-GPU data transfer is actually being routed through the host system.**
+
+A16:The tool tracking post process is doing device to host copies [here](https://github.com/nvidia-holoscan/holohub/blob/main/operators/tool_tracking_postprocessor/tool_tracking_postprocessor.cpp\#L113) and [here](https://github.com/nvidia-holoscan/holohub/blob/main/operators/tool_tracking_postprocessor/tool_tracking_postprocessor.cpp\#L124) . This operations are also executed when the app is running on a single GPU.
+Holoviz is not doing any device to host operations, neither single nor multi GPU.
+
+## Troubleshooting
+
+**Q1: How can I debug Holoscan SDK examples and tests using Visual Studio Code?**
+
+A1: You can use the
+
+```
+./run vscode
+```
+
+command to launch VSCode in a development container. Configure CMake, build the source code, and use the Run and Debug view to start debugging sessions.
+
+**Q2: How can I get started with debugging my Holoscan application?**
+
+For debugging applications in Holoscan repo, refer to the [Debugging Section](https://docs.nvidia.com/holoscan/sdk-user-guide/holoscan_debugging.html).  For debugging applications in Holohub, refer to HoloHub [tutorials](https://github.com/nvidia-holoscan/holohub/tree/main/tutorials/debugging) for strategies to set up debugging with Visual Studio Code or other tools such as GDB.
+
+**Q3: Is it possible to debug both C++ and Python components simultaneously in Holoscan SDK?**
+
+A3:Yes, you can use the Python C++ Debugger extension in VSCode to debug both C++ and Python components simultaneously. For more information, please refer to the Debugging [section](https://docs.nvidia.com/holoscan/sdk-user-guide/holoscan_debugging.html) in the Holoscan SDK User Guide.
+
+
+**Q4: How do I analyze a core dump file when my application crashes?**
+
+A4:Use the gdb command with your application and core dump file, e.g.,
+
+```
+gdb <application> <coredump_file>
+```
+
+ This will allow you to examine the stack trace and other debugging information.
+
+**Q5: What should I do if core dumps are not being generated?**
+
+A5: Enable core dumps by setting
+
+```
+ulimit -c unlimited
+```
+
+ and configuring the `core_pattern value`. You may need to do this on the host system if working in a Docker container. For more information, please refer to the Debugging section in the [Holoscan User Guide](https://docs.nvidia.com/holoscan/sdk-user-guide/holoscan\_debugging.html).
+
+**Q6: How can I debug a distributed application using UCX?**
+
+A6: Set the `UCX_HANDLE_ERRORS` environment variable to control UCX's behavior during crashes. Options include printing backtraces, attaching a debugger, or freezing the application. For more information, please refer to the Debugging section in the [Holoscan User Guide](https://docs.nvidia.com/holoscan/sdk-user-guide/holoscan_debugging.html).
+
+The [`UCX_LOG_LEVEL`](https://openucx.readthedocs.io/en/master/faq.html\#how-can-i-tell-which-protocols-and-transports-are-being-used-for-communication) environment variable can be set to “info” or higher level to see more detailed information about UCX transports used (the default level for UCX logging is “warn”). The full set of available UCX logging levels correspond to the list [here](https://github.com/openucx/ucx/blob/de7d3a10e4b10bbc9ec7e1a567b2d1a7514a3710/src/ucs/debug/log_def.h\#L44-L54).
+
+**Q7:  What tools are available for profiling Python applications in Holoscan?**
+
+A7: You can use tools like pyinstrument, pprofile, yappi, cProfile, or line_profiler. Each has different strengths and may be more suitable depending on your specific needs. For more information , please refer to the [Python debugging section](https://docs.nvidia.com/holoscan/sdk-user-guide/holoscan_debugging.html\#profiling-a-holoscan-python-application) in the Holoscan User Guide.
+
+Each profiler has its strengths and is suitable for different debugging scenarios, from high-level overviews to detailed line-by-line analysis.Please find below more details:
+
+1. pyinstrument:
+   * Call stack profiler that highlights performance bottlenecks
+   * Provides easily understandable output directly in the terminal
+   * Multithreading-aware, suitable for use with multithreaded schedulers
+   * Visualizes the execution tree, showing time spent in each function
+1. pprofile:
+   * Line-granularity profiler
+   * Thread-aware and deterministic
+   * Provides detailed statistics for each line of code
+   * Shows hit counts, time per hit, and percentage of total time for each line
+1. yappi:
+   * Tracing profiler that is multithreading, asyncio, and gevent aware
+   * Can handle complex threading scenarios in Holoscan applications
+   * Provides hit counts for methods across different threads
+   * Requires setting a context ID callback for accurate thread identification
+1. cProfile:
+   * Deterministic profiling module built into Python
+   * Provides a high-level overview of function calls and time spent
+   * Easy to use with minimal setup
+   * Good for identifying which functions are taking the most time overall
+1. line_profiler:
+   * Offers line-by-line profiling of specific functions
+   * Provides detailed timing information for each line within a function
+   * Useful for pinpointing exact lines causing performance issues
+   * Requires adding @profile decorators to functions of interest
+
+**Q8:  How do I measure code coverage for my Holoscan Python application?**
+
+A8: You can use
+
+```
+ Coverage.py
+```
+
+ to measure code coverage.
+
+* install it with pip
+* run your application with coverage
+*  generate reports using commands like coverage report or coverage html
+
+  For more detailed information, please refer to [Measuring Code Coverage](https://docs.nvidia.com/holoscan/sdk-user-guide/holoscan_debugging.html\#measuring-code-coverage) section in the Holoscan User Guide.
+
+
+
+
+**Q9: How can I trace function calls in my Python application?**
+
+**A9:**You can use the trace module to track function calls. Run your application with
+
+```
+ python -m trace --trackcalls
+```
+
+or use the trace module programmatically in your code.
+
+**Q10:  How can I leverage a large language model (LLM) to assist in debugging a complex and lengthy task implemented with the Holoscan SDK?**
+
+A10: Holoscan SDK offers to do the task piecemeal and get feedback from you as it completes each part of the task. This allows for step-by-step debugging of complex, lengthy processes. For Holochat links , please refer to  to [Holochat-GPT](https://chatgpt.com/g/g-M6hMJimGa-holochatgpt) to ask questions and receive feedback on building and debugging Holoscan SDK applications. Note that Holochat-GPT is not confidential and may not be trained on the latest Holoscan SDK release. Please refer to the [Holoscan SDK User Guide](https://docs.nvidia.com/holoscan/sdk-user-guide/) for latest APIs.
+
+**Q11: How can I use Python’s \`coverage.py\` with the Holoscan SDK?**
+
+**A4**:In Python, both coverage measurement and Python debugger is using [sys.settrace()](https://docs.python.org/3/library/sys.html\#sys.settrace) or [PyEval_SetTrace()](https://docs.python.org/3/c-api/init.html\#c.PyEval_SetTrace) method to register a trace method for the current thread. When [coverage.py](https://github.com/nedbat/coveragepy) or Python debugger is running, it calls those method (and [threading.settrace](https://docs.python.org/3/library/threading.html\#threading.settrace) for newly-created threads) for tracing Python code execution.However, when Python methods (such as compute()) are called by the threads (worker(s) of GreedyScheduler/MultiThreadScheduler) in Holoscan SDK, which is not derived from the Python's main thread, [sys.settrace()](https://docs.python.org/3/library/sys.html\#sys.settrace) (or [PyEval_SetTrace()](https://docs.python.org/3/c-api/init.html\#c.PyEval_SetTrace)) is not called properly for those threads.The resolution is:
+
+1\.	Capture the current trace method (by using sys.gettrace(), let's say `CURR_TRACE_METHOD`- if it exists when `Application.run()` method is called.
+2\.	When Python methods (such as Operator.compute()/Fragment.compose()/Operator.initialize()/Operator.start()/Operator.stop()) are called, get current trace method (by using sys.gettrace()) and call [sys.settrace](https://docs.python.org/3/library/sys.html\#sys.settrace)(`CURR_TRACE_METHOD`) and set current stack frame's `f_trace` to `CURR_TRACE_METHOD` (current stack frame is available through [inspect.currentframe()](https://docs.python.org/3/library/inspect.html\#inspect.currentframe)) if no trace method was set before.
+
+* This process can be sped up by storing thread id-\> \<trace method\> map (or using thread local variable) and checking if trace method is already registered to the current thread.
+
+
+Python's cProfile module is using [sys.setprofile()](https://github.com/python/cpython/blob/main/Lib/profile.py\#L422) instead of [sys.settrace()](https://docs.python.org/3/library/sys.html\#sys.settrace) (because the profile method is called per method, which is more effective), and we can apply similar approach for enabling profiler on Holoscan's Python Operator methods.
+
+**Q12:How does Holoscan SDK reconcile command line arguments for multi-fragment applications?**
+
+**A12:** The CLI arguments (such as \--driver, \--worker,  \--fragments)  are parsed by the Application class and the remaining arguments are available as app.argv property.
+
+```py
+import argparse
+import sys
+from holoscan.core import Application
+
+class MyApp(Application):
+    def compose(self):
+        pass
+
+if __name__ == "__main__":
+    app = MyApp()
+
+    print("sys.argv:", sys.argv)
+    print("app.argv:", app.argv)
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input")
+    args = parser.parse_args(app.argv[1:])
+    print("args:", args)
+
+    app.run()
+
+# $ python cli_test.py --address 12.3.0 --input a
+# sys.argv: ['cli_test.py', '--address', '12.3.0', '--input', 'a']
+# app.argv: ['cli_test.py', '--input', 'a']
+args: Namespace(input='a')
+```
+
+**Q13:Why is the Inference Operator rejecting the input shape for a CNN-LSTM model with a 5-dimensions input (batch, temporal_dim, channels, width, height) ?**
+A13: In Holoscan SDK v2.4 and earlier, the InferenceOp supports rank only between 2 and 4\.
+
+**Q14: I am attempting to profile a Holoscan application in a container using NVIDIA NSight Systems. I'm following the documentation available at [https://github.com/nvidia-holoscan/holohub/blob/main/doc/developer.md](https://github.com/nvidia-holoscan/holohub/blob/main/doc/developer.md) and using a recent checkout of Holohub with the Holoscan v2.1 NGC image.**
+
+**My process is as follows:**
+
+1. **Initiate the development container with NSight profiling enabled:**
+
+```
+./dev_container launch --nsys_profile
+```
+
+1. **Launch the endoscopy tool tracking application with NSight profiling:**
+
+```
+./run launch endoscopy_tool_tracking python --nsys_profile
+```
+
+**However, I encounter the following error:**
+
+```
+ERROR: For Nsight Systems profiling the Linux operating system's perf_event_paranoid level must be 2 or less.
+See https://docs.nvidia.com/nsight-systems/InstallationGuide/index.html#linux-requirements for more information.
+```
+
+**How can I fix it ?**
+
+A14: The
+
+```
+
+sudo sh -c 'echo 2 >/proc/sys/kernel/perf_event_paranoid'
+```
+
+command needs to be executed on the host system, not inside the container.
+
+* Check the current value
+
+```
+
+cat /proc/sys/kernel/perf_event_paranoid
+```
+
+* To temporarily change the value to 2 (which allows kernel profiling by unprivileged users):
+
+```
+sudo sh -c 'echo 2 >/proc/sys/kernel/perf_event_paranoid'
+```
+
+*  To make the change permanent, edit `/etc/sysctl.conf`: Add or modify the line:
+
+```
+update /etc/sysctl.conf
+```
+
+
+
+* Then apply the changes:
+
+```
+sudo sysctl -p
+```
+
+
+* If you need to allow use of almost all events by all users, you can set the value to \-1 instead of 2\.
+* The values and their meanings:
+  * \-1: Allow use of (almost) all events by all users
+  * 0 or higher: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN
+  * 1 or higher: Also disallow CPU event access by users without CAP_SYS_ADMIN
+  * 2 or higher: Also disallow kernel profiling by users without CAP_SYS_ADMIN
+* After making changes, you may need to restart your application or services that depend on these performance events.
+* Refer to the [perf_event_open manpage](https://man7.org/linux/man-pages/man2/perf_event_open.2.html) for more detailed information.
+
+Remember that lowering this value increases the access unprivileged users have to performance data, which could potentially be a security concern in some environments. Always consider the security implications before making such changes. Refer to the [NVIDIA Nsight Systems installation guide](https://docs.nvidia.com/nsight-systems/InstallationGuide/index.html\#requirements-for-x86-64-and-arm-sbsa-targets-on-linux) for more information.
+
+**Q15: I am developing an application utilizing two distinct processing graphs:**
+
+1. **VideoReplayerOp \-\> HolovizOp**
+1. **CameraOp  \-\> HolovizOp**
+
+**The first graph displays source video for camera observation, while the second applies AI processing  to enhance camera input. Currently, I am employing two separate Holoviz instances, with the instance for source video (graph 1\) exclusively utilizing the second monitor (DP-4).**
+
+**I've encountered an issue during application termination: when pressing 'Esc' to exit, the main Holoviz instance on the primary screen closes as expected, but the source video continues playing on the secondary screen. While I can force quit the application using 'Ctrl+C', I am seeking a more elegant solution for proper termination.**
+
+**Is there a recommended method for gracefully closing the entire application?**
+
+A15:To address your concerns about graceful application termination with multiple Holoviz instances, let's first understand how the ESC key functions in Holoviz:
+
+1. ESC Key Behavior:
+   * Holoviz monitors for window closure requests via the ESC key.
+   * When pressed, it deactivates the associated HolovizOp by setting a boolean scheduling term to false.
+1. Termination Scenarios:
+   * Single HolovizOp: ESC key press closes the entire application.
+   * Multiple HolovizOps: ESC only terminates the specific Holoviz instance, leaving others running.
+
+Proposed Solution:
+
+To achieve synchronized termination across all Holoviz instances:
+
+1. Create a shared boolean scheduling condition.
+1. For each HolovizOp in your application:
+   * Set this condition as a general execution condition.
+   * Importantly, also set it as the `window_close_scheduling_term` parameter.
+
+**Q16:I'm trying to use the `render_buffer_output` from Holoviz Python operator, but I get the following error :**
+
+```
+[error] [entity.hpp:90] Unable to find component from the name 'render_buffer_output' (error code: 24)
+
+```
+
+A16:The reason why you are getting this error is because HolovizOp returns a gxf::VideoBuffer which is not yet supported by Python in Holoscan SDK.
+
+**Q17:I am using an AGX Orin with an MST board, with 2 display monitors \- DP-0.1 (touch screen) and DP-0.2 (an external main display). I am trying  to force Holoviz to display on monitor DP-0.2, by setting `use_exclusive_display = True`, `display_name = "DP-0.2"`. But this results in error below:**
+
+```
+[info] [exclusive_window.cpp:125] ____________________
+[info] [exclusive_window.cpp:126] Available displays :
+[info] [exclusive_window.cpp:129] ADA (DP-0.1)
+[info] [exclusive_window.cpp:129] LG Electronics LG ULTRAWIDE (DP-0.2)
+[info] [exclusive_window.cpp:134]
+[info] [exclusive_window.cpp:135] Using display "LG Electronics LG ULTRAWIDE (DP-0.2)"
+[info] [exclusive_window.cpp:148] X server is running, trying to acquire display
+[error] [context.cpp:56] VkResult -13 - unknown
+[error] [gxf_wrapper.cpp:57] Exception occurred when starting operator: 'holoviz' - Failed to acquire display from X-Server.
+
+```
+
+**As a potential fix, I have disabled the main display in nvidia-settings for the compositor, but the application still crashes with the following error:**
+
+```
+
+[error] [context.cpp:56] /workspace/holoscan-sdk/modules/holoviz/thirdparty/nvpro_core/nvvk/swapchain_vk.cpp(172): Vulkan Error : unknown
+[error] [context.cpp:56] /workspace/holoscan-sdk/modules/holoviz/thirdparty/nvpro_core/nvvk/swapchain_vk.cpp(172): Vulkan Error : unknown
+[error] [context.cpp:56] /workspace/holoscan-sdk/modules/holoviz/thirdparty/nvpro_core/nvvk/swapchain_vk.cpp(172): Vulkan Error : unknown
+[error] [gxf_wrapper.cpp:57] Exception occurred when starting operator: 'holoviz' - Failed to update swap chain.
+```
+
+A17:The progress indicated by the message "\[info\] \[exclusive_window.cpp:161\] Using display mode 1920x1080 60.000 Hz" is a positive sign. However, the failure of Vulkan swap chain creation with an unknown error is concerning. While we regularly test exclusive display with discrete GPUs (dGPUs), this is seldom tested with integrated GPUs (iGPUs). In the past, we've encountered issues with Vulkan features on iGPUs.
+
+An alternative option would be to use fullscreen mode, which is known to work on iGPUs. It's important to note that Holoviz always opens in fullscreen mode on the primary display, and display name selection is not currently supported.
+
+Given that the display they want to use for Holoviz appears to be the primary display, you could try setting `fullscreen = True` instead of `use_exclusive_display = True`.
+
+**Q18: How can I use CuPy arrays with Holoscan SDK’s InferenceOp?**
+**A18:** Both CuPy and Holoscan SDK support `_cuda_array_interface` to facilitate seamless array integration among libraries. Refer to the [HoloHub library integration tutorial CuPy section](https://github.com/nvidia-holoscan/holohub/tree/main/tutorials/integrate_external_libs_into_pipeline\#integrate-cupy-library) for details on using CuPy arrays in a Holoscan SDK application.
+
+**Q19:I am running into these errors when using the Holoscan Packager in my networking environment:**
+
+```
+curl: (6) Could not resolve host: github.com.
+Failed to establish a new connection:: [Errno -3] Temporary failure in name solution...
+
+
+```
+
+A19:To resolve these errors, edit the `/etc/docker/daemon.json` file to include `dns` and `dns-search` fields as follows:
+
+```
+{
+    "default-runtime": "nvidia",
+    "runtimes": {
+        "nvidia": {
+            "args": [],
+            "path": "nvidia-container-runtime"
+        }
+    },
+    "dns": ["IP-1", "IP-n"],
+    "dns-search": ["DNS-SERVER-1", "DNS-SERVER-n"]
+}
+
+```
+
+You may need to consult your IT team and replace `IP-x` and `DNS-SERVER-x` with the provided values.
+
+## Miscellaneous
+
+**Q1: Can I use DLA cores with the Holoscan SDK?**
+
+A1: There are 2 situations in which Holsocan SDK can support Deep Learning Accelerator (DLA) cores. You can configure your application to offload certain inference tasks to DLA cores to improve performance and efficiency.
+
+* User created engine file:
+If the TensorRT engine file is created with `–useDLACore=0` and the engine file is used in the Holsocan SDK inference framework, then it will use DLA (core 0) as HoloInfer just guides the execution to TensorRT and it will automatically pick it up.The user must give the path to engine file in `model_path_map` and enable the flag `is_engine_path=true` in the inference parameter set.
+
+* If user is creating engine file via HoloInfer:
+HoloInfer currently does not support using any DLA cores for engine creation (even if it’s available) and it defaults to GPU.
+
+**Q2: How can I generate HSDK applications from Graph Composer?**
+
+A2:In Graph Composer, graph nodes (entities) are based on GXF Codelets/Components (from GXF extensions) that are registered in the Graph Composer registry. Currently, none of the Holoscan operators are registered in this registry. However, we have a method to convert Holoscan Operators into GXF extensions. Please find example code below :
+
+* [https://github.com/nvidia-holoscan/holoscan-sdk/tree/main/examples/wrap_operator_as_gxf_extensio](https://github.com/nvidia-holoscan/holoscan-sdk/tree/main/examples/wrap_operator_as_gxf_extension)
+* [https://github.com/nvidia-holoscan/holoscan-sdk/tree/main/gxf_extensions/gxf_holoscan_wrapper](https://github.com/nvidia-holoscan/holoscan-sdk/tree/main/gxf_extensions/gxf_holoscan_wrapper)
+
+To generate or run an application graph (pipeline) described in Graph Composer, we need a method to import GXF Codelets/Components as Holoscan Operators/Resources. Currently, users need to manually wrap GXF Codelets/Resources to convert them into Holoscan Operators/Resources.
+
+**Q3: How can I support external events like React?**
+
+A3:Regarding parameter flags, we support GXF's optional/dynamic parameter flag (Figure 1\) and can set it in the `Operator::setup(OperatorSpec& spec)` method using `spec.param(..., ParameterFlag::kDynamic);` (Figure 2). However, this parameter flag is primarily for wrapping GXF Codelets. When implementing a Holoscan Native operator, there is no runtime check for parameter value changes, allowing us to update parameter values and use them without setting the parameter flag (`holoscan::Parameter<T>` simply acts as a value holder using `std::any`).
+
+To support external events/controls (such as for Qt), we can implement a Holoscan Resource (GXF Component) that handles events and state management. This would allow a Holoscan operator to update the state based on events and use it, similar to React's state management approach. QtHoloscanVideo (Figure 3\) could be a good candidate for such a Holoscan Resource.
+
+For reference, CudaStreamPool is an example of a Holoscan Resource (wrapping the GXF CudaStreamPool component), and the object is passed to the operator as an argument (Figure 4). However, it's not necessary to pass Holoscan resources as arguments. Instead, we can provide them to the operator (Figure 5\) and access them through the `resource<T>()` method inside the `compute()` method (Figure 6). In Python, resources can be accessed via the `op.resources` property (which is of type `dict[str, Resource]`).
+
+Holoscan supports Native Resources for C++ (with an example demonstrating this feature), but Native Python resources have not been implemented yet.
+
+Given this situation, it might be preferable to implement it as a GXF Component and expose it as a C++/Python Holoscan Resource. This approach would allow QtVideoOp (in Figure 3\) to define a public method for accessing the event/control Holoscan resource object from the Qt app logic (possibly through a `HoloscanVideo*` object in QtVideoOp).
+
+![](images/image3.png)
+Figure 1
+
+![](images/image4.png)
+Figure 2
+
+![](images/image5.png)
+Figure 3
+
+![](images/image6.png)
+Figure 4
+
+![](images/image7.png)
+Figure 5
+
+![](images/image8.png)
+Figure 6
+
+
+**Q5: I'm encountering difficulties implementing the `PYBIND11_OVERRIDE_PURE` mechanism in pybind11. Specifically, I'm trying to override a pure virtual C++ method with a Python subclass, but it's not working as expected. The Python subclass doesn't seem to be successfully overriding the C++ method.**
+A5:A potential fix is to keep a global reference to the Python object.Please refer to the fix provide [here](https://github.com/pybind/pybind11/issues/1333).
+
+
+This code addresses a potential issue in pybind11's handling of class inheritance and instance management. The problem arises when mocking the C++ `RoceReceiverOp` class with a Python `InstrumentedReceiverOperator` class. Here's a breakdown of the situation:
+
+1. Issue: When the Holoscan app runs, it calls methods from `RoceReceiverOp` instead of the intended `InstrumentedReceiverOperator` methods.
+1. Cause: The `InstrumentedReceiverOperator` instance seems to disappear from pybind11's internal registry during app execution. This occurs despite expectations that passing it to C++ should maintain its lifecycle.
+1. Debugging attempts: Efforts to track the instance deregistration (via breakpoints in pybind11's `deregister_instance()`) were unsuccessful, leaving the exact cause unclear.
+1. Workaround: To prevent the premature destruction of the `InstrumentedReceiverOperator` instance, the code maintains a global reference to it.
+
+This solution ensures the mocked class instance remains available throughout the app's lifecycle, allowing proper method overriding and execution.
+
+## Additional Resources
+
+**Q1: Where can I find additional resources and support for the Holoscan SDK?**
+
+ A1: Additional resources and support can be found on:
+
+* The NVIDIA developer page.For more information, please refer to [this](https://developer.nvidia.com/join-nvidia-developer-program?ncid=pa-srch-goog-433786&\_bt=699127533347&\_bk=nvidia%20inception&\_bm=b&\_bn=g&\_bg=161777607269\&gad\_source=1\&gclid=EAIaIQobChMI9Yrtk6PIhwMV0c\_CBB3VHgd4EAAYASAAEgJoSPD\_BwE) link.
+* Holoscan SDK GitHub repository.Please refer to [this](https://github.com/nvidia-holoscan/holoscan-sdk) link.
+* NVIDIA Developer Forums for community support and discussions. For the Holoscan SDK forum, please refer to [this](https://forums.developer.nvidia.com/c/healthcare/holoscan-sdk/320/all) link.
+
+**Q2: How can I contribute to the Holoscan SDK?**
+
+A2: The Holoscan SDK is open-source. You can contribute by:
+* Submitting pull requests for bug fixes or new features. For more detailed information on how to contribute, please refer to [this](https://www.google.com/url?q=https://github.com/nvidia-holoscan/holohub/blob/main/CONTRIBUTING.md\&sa=D\&source=docs\&ust=1722121019335970\&usg=AOvVaw1XzBLsoVDnaqHi74NpGCLq) link.
+* Participating in community discussions on the [Holoscan SDK forum](https://forums.developer.nvidia.com/c/healthcare/holoscan-sdk/320/all)
diff --git a/docs/images/image1.png b/docs/images/image1.png
new file mode 100644
index 0000000..66842a2
Binary files /dev/null and b/docs/images/image1.png differ
diff --git a/docs/images/image2.png b/docs/images/image2.png
new file mode 100644
index 0000000..1e43bad
Binary files /dev/null and b/docs/images/image2.png differ
diff --git a/docs/images/image3.png b/docs/images/image3.png
new file mode 100644
index 0000000..39b93da
Binary files /dev/null and b/docs/images/image3.png differ
diff --git a/docs/images/image4.png b/docs/images/image4.png
new file mode 100644
index 0000000..b5e43a2
Binary files /dev/null and b/docs/images/image4.png differ
diff --git a/docs/images/image5.png b/docs/images/image5.png
new file mode 100644
index 0000000..196327f
Binary files /dev/null and b/docs/images/image5.png differ
diff --git a/docs/images/image6.png b/docs/images/image6.png
new file mode 100644
index 0000000..0037044
Binary files /dev/null and b/docs/images/image6.png differ
diff --git a/docs/images/image7.png b/docs/images/image7.png
new file mode 100644
index 0000000..e5fda15
Binary files /dev/null and b/docs/images/image7.png differ
diff --git a/docs/images/image8.png b/docs/images/image8.png
new file mode 100644
index 0000000..e62b1e5
Binary files /dev/null and b/docs/images/image8.png differ
diff --git a/docs/index.md b/docs/index.md
index 64dd270..d1a034c 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -91,6 +91,13 @@ gxf_job_statistics
 latency_tool.rst
 ```
 
+```{toctree}
+:maxdepth: 1
+:caption: FAQ
+
+hsdk_faq
+```
+
 ```{toctree}
 :caption: Links
 Developer page <https://developer.nvidia.com/holoscan-sdk>
diff --git a/docs/sdk_installation.md b/docs/sdk_installation.md
index 051b4f8..d61039a 100644
--- a/docs/sdk_installation.md
+++ b/docs/sdk_installation.md
@@ -81,11 +81,11 @@ We provide multiple ways to install and run the Holoscan SDK:
 ````{tab-item} NGC Container
 - **dGPU** (x86_64, IGX Orin dGPU, Clara AGX dGPU, GH200)
    ```bash
-   docker pull nvcr.io/nvidia/clara-holoscan/holoscan:v2.3.0-dgpu
+   docker pull nvcr.io/nvidia/clara-holoscan/holoscan:v2.4.0-dgpu
    ```
 - **iGPU** (Jetson, IGX Orin iGPU, Clara AGX iGPU)
    ```bash
-   docker pull nvcr.io/nvidia/clara-holoscan/holoscan:v2.3.0-igpu
+   docker pull nvcr.io/nvidia/clara-holoscan/holoscan:v2.4.0-igpu
    ```
 See details and usage instructions on [NGC][container].
 ````
diff --git a/docs/visualization.md b/docs/visualization.md
index 3450579..336f145 100644
--- a/docs/visualization.md
+++ b/docs/visualization.md
@@ -128,14 +128,14 @@ Supported formats for `nvidia::gxf::VideoBuffer`.
 | nvidia::gxf::VideoFormat | Supported | Description |
 |-|-|-|
 | GXF_VIDEO_FORMAT_CUSTOM | - | |
-| GXF_VIDEO_FORMAT_YUV420 | - | BT.601 multi planar 4:2:0 YUV |
-| GXF_VIDEO_FORMAT_YUV420_ER | - | BT.601 multi planar 4:2:0 YUV ER |
-| GXF_VIDEO_FORMAT_YUV420_709 | - | BT.709 multi planar 4:2:0 YUV |
-| GXF_VIDEO_FORMAT_YUV420_709_ER | - | BT.709 multi planar 4:2:0 YUV |
-| GXF_VIDEO_FORMAT_NV12 | - | BT.601 multi planar 4:2:0 YUV with interleaved UV |
-| GXF_VIDEO_FORMAT_NV12_ER | - | BT.601 multi planar 4:2:0 YUV ER with interleaved UV |
-| GXF_VIDEO_FORMAT_NV12_709 | - | BT.709 multi planar 4:2:0 YUV with interleaved UV |
-| GXF_VIDEO_FORMAT_NV12_709_ER | - | BT.709 multi planar 4:2:0 YUV ER with interleaved UV |
+| GXF_VIDEO_FORMAT_YUV420 | &check; | BT.601 multi planar 4:2:0 YUV |
+| GXF_VIDEO_FORMAT_YUV420_ER | &check; | BT.601 multi planar 4:2:0 YUV ER |
+| GXF_VIDEO_FORMAT_YUV420_709 | &check; | BT.709 multi planar 4:2:0 YUV |
+| GXF_VIDEO_FORMAT_YUV420_709_ER | &check; | BT.709 multi planar 4:2:0 YUV ER |
+| GXF_VIDEO_FORMAT_NV12 | &check; | BT.601 multi planar 4:2:0 YUV with interleaved UV |
+| GXF_VIDEO_FORMAT_NV12_ER | &check; | BT.601 multi planar 4:2:0 YUV ER with interleaved UV |
+| GXF_VIDEO_FORMAT_NV12_709 | &check; | BT.709 multi planar 4:2:0 YUV with interleaved UV |
+| GXF_VIDEO_FORMAT_NV12_709_ER | &check; | BT.709 multi planar 4:2:0 YUV ER with interleaved UV |
 | GXF_VIDEO_FORMAT_RGBA | &check; | RGBA-8-8-8-8 single plane |
 | GXF_VIDEO_FORMAT_BGRA | &check; | BGRA-8-8-8-8 single plane |
 | GXF_VIDEO_FORMAT_ARGB | &check; | ARGB-8-8-8-8 single plane |
@@ -515,7 +515,7 @@ To use sRGB encoded images set the `fmt` parameter of {func}`viz::ImageCudaDevic
 
 {py:class}`Python <holoscan.operators.HolovizOp>`.
 
-### Examples
+### Holoviz Operator Examples
 
 There are multiple [examples](https://github.com/nvidia-holoscan/holoscan-sdk/tree/main/examples/holoviz) both in Python and C++ showing how to use various features of the Holoviz operator.
 
@@ -578,6 +578,6 @@ Holoviz example app
 
 {ref}`namespace_holoscan__viz`
 
-### Examples
+### Holoviz Module Examples
 
 There are multiple [examples](https://github.com/nvidia-holoscan/holoscan-sdk/blob/main/modules/holoviz/examples) showing how to use various features of the Holoviz module.
diff --git a/examples/aja_capture/README.md b/examples/aja_capture/README.md
index 0c0872f..3eaeb5b 100644
--- a/examples/aja_capture/README.md
+++ b/examples/aja_capture/README.md
@@ -14,7 +14,7 @@ Minimal example to demonstrate the use of the aja source operator to capture dev
   ```bash
   ./run launch # optional: append `install` for install tree
   ./examples/aja_capture/cpp/aja_capture
-  ```
+  
 * **source (local env)**:
   ```bash
   ${BUILD_OR_INSTALL_DIR}/examples/aja_capture/cpp/aja_capture
@@ -47,3 +47,13 @@ Minimal example to demonstrate the use of the aja source operator to capture dev
   export PYTHONPATH=${BUILD_OR_INSTALL_DIR}/python/lib
   python3 ${BUILD_OR_INSTALL_DIR}/examples/aja_capture/python/aja_capture.py
   ```
+## Settings
+
+ To evaluate the AJA example using alternative resolutions, you may modify the aja_capture.yaml configuration file as needed. For instance, to test a resolution format of 1280 x 720 at 50 Hz, you can specify the following parameters in the aja section of the configuration :
+   
+    ```bash
+      aja:
+        width: 1280
+        height: 720
+        framerate: 60
+    ```
\ No newline at end of file
diff --git a/examples/aja_capture/cpp/CMakeLists.min.txt b/examples/aja_capture/cpp/CMakeLists.min.txt
new file mode 100644
index 0000000..18c8eb1
--- /dev/null
+++ b/examples/aja_capture/cpp/CMakeLists.min.txt
@@ -0,0 +1,106 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if(BUILD_AJA)
+  # Create example
+  add_executable(aja_capture
+    aja_capture.cpp
+  )
+
+  target_link_libraries(aja_capture
+    PRIVATE
+    holoscan::core
+    holoscan::ops::aja
+    holoscan::ops::holoviz
+  )
+
+  # Copy config file
+  add_custom_target(aja_capture_yaml
+    COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/aja_capture.yaml" ${CMAKE_CURRENT_BINARY_DIR}
+    DEPENDS "aja_capture.yaml"
+    BYPRODUCTS "aja_capture.yaml"
+  )
+
+  add_dependencies(aja_capture aja_capture_yaml)
+
+  # Testing
+  if(BUILD_TESTING)
+    set(RECORDING_DIR ${CMAKE_CURRENT_BINARY_DIR}/recording_output)
+    set(SOURCE_VIDEO_BASENAME video_replayer_output)
+    set(VALIDATION_FRAMES_DIR ${CMAKE_SOURCE_DIR}/testing/validation_frames/aja_capture/)
+
+    file(MAKE_DIRECTORY ${RECORDING_DIR})
+
+    file(READ ${CMAKE_CURRENT_SOURCE_DIR}/aja_capture.yaml CONFIG_STRING)
+    string(REPLACE "count: -1" "count: 10" CONFIG_STRING "${CONFIG_STRING}")
+    set(CONFIG_FILE ${CMAKE_CURRENT_BINARY_DIR}/cpp_aja_capture_config.yaml)
+    file(WRITE ${CONFIG_FILE} "${CONFIG_STRING}")
+
+    # Patch the current example to enable recording the rendering window
+    add_custom_command(OUTPUT aja_capture_test.cpp
+      PRE_LINK
+      COMMAND patch -u -o aja_capture_test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aja_capture.cpp
+              ${CMAKE_SOURCE_DIR}/testing/validation_frames/aja_capture/cpp_aja_capture.patch
+    )
+
+    # Create the test executable
+    add_executable(aja_capture_test
+      aja_capture_test.cpp
+    )
+
+    target_include_directories(aja_capture_test
+      PRIVATE ${CMAKE_SOURCE_DIR}/testing)
+
+    target_compile_definitions(aja_capture_test
+      PRIVATE RECORD_OUTPUT RECORDING_DIR="${RECORDING_DIR}"
+      PRIVATE SOURCE_VIDEO_BASENAME="${SOURCE_VIDEO_BASENAME}"
+    )
+
+    target_link_libraries(aja_capture_test
+      PRIVATE
+      holoscan::core
+      holoscan::ops::aja
+      holoscan::ops::holoviz
+      holoscan::ops::video_stream_replayer
+      holoscan::ops::video_stream_recorder
+      holoscan::ops::format_converter
+    )
+
+    # Add the test and make sure it runs
+    add_test(NAME EXAMPLE_CPP_AJA_CAPTURE_TEST
+      COMMAND ${CMAKE_CURRENT_BINARY_DIR}/aja_capture_test ${CONFIG_FILE}
+      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
+    )
+    set_tests_properties(EXAMPLE_CPP_AJA_CAPTURE_TEST PROPERTIES
+      PASS_REGULAR_EXPRESSION "Scheduler stopped: Some entities are waiting for execution"
+    )
+
+    # Add a test to check the validity of the frames
+    add_test(NAME EXAMPLE_CPP_AJA_CAPTURE_RENDER_TEST
+        COMMAND python3 ${CMAKE_SOURCE_DIR}/../bin/video_validation.py
+        --source_video_dir ${RECORDING_DIR}
+        --source_video_basename ${SOURCE_VIDEO_BASENAME}
+        --output_dir ${RECORDING_DIR}
+        --validation_frames_dir ${VALIDATION_FRAMES_DIR}
+        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
+    )
+
+    set_tests_properties(EXAMPLE_CPP_AJA_CAPTURE_RENDER_TEST PROPERTIES
+    DEPENDS EXAMPLE_CPP_AJA_CAPTURE_TEST
+    PASS_REGULAR_EXPRESSION "Valid video output!"
+    )
+  endif()
+
+endif()
\ No newline at end of file
diff --git a/examples/aja_capture/cpp/CMakeLists.txt b/examples/aja_capture/cpp/CMakeLists.txt
index b0fc1ce..43fd05b 100644
--- a/examples/aja_capture/cpp/CMakeLists.txt
+++ b/examples/aja_capture/cpp/CMakeLists.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -47,7 +47,95 @@ install(TARGETS aja_capture
   DESTINATION "${app_relative_dest_path}"
   COMPONENT holoscan-examples
 )
+
 install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/aja_capture.yaml"
   DESTINATION ${app_relative_dest_path}
   COMPONENT holoscan-examples
 )
+
+# Installing examples
+if(HOLOSCAN_INSTALL_EXAMPLE_SOURCE)
+  # Install the source
+  install(FILES aja_capture.cpp aja_capture.yaml
+    DESTINATION "${app_relative_dest_path}"
+    COMPONENT holoscan-examples
+  )
+
+  # Install the minimal CMakeLists.txt file
+  install(FILES CMakeLists.min.txt
+    RENAME "CMakeLists.txt"
+    DESTINATION "${app_relative_dest_path}"
+    COMPONENT holoscan-examples
+  )
+endif()
+
+# Testing
+if(HOLOSCAN_BUILD_TESTS AND TEST_AJA)
+
+  set(RECORDING_DIR ${CMAKE_CURRENT_BINARY_DIR}/recording_output)
+  set(SOURCE_VIDEO_BASENAME video_replayer_output)
+  set(VALIDATION_FRAMES_DIR ${CMAKE_SOURCE_DIR}/tests/data/validation_frames/aja_capture/)
+
+  file(MAKE_DIRECTORY ${RECORDING_DIR})
+
+  file(READ ${CMAKE_CURRENT_SOURCE_DIR}/aja_capture.yaml CONFIG_STRING)
+  string(REPLACE "count: -1" "count: 10" CONFIG_STRING "${CONFIG_STRING}")
+  set(CONFIG_FILE ${CMAKE_CURRENT_BINARY_DIR}/cpp_aja_capture_config.yaml)
+  file(WRITE ${CONFIG_FILE} "${CONFIG_STRING}")
+
+  # Patch the current example to enable recording the rendering window
+  add_custom_command(OUTPUT aja_capture_test.cpp
+    PRE_LINK
+    COMMAND patch -u -o aja_capture_test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aja_capture.cpp
+            ${CMAKE_SOURCE_DIR}/tests/data/validation_frames/aja_capture/cpp_aja_capture.patch
+  )
+
+  # Create the test executable
+  add_executable(aja_capture_test
+     aja_capture_test.cpp
+  )
+
+  target_include_directories(aja_capture_test
+    PRIVATE ${CMAKE_SOURCE_DIR}/tests)
+
+  target_compile_definitions(aja_capture_test
+    PRIVATE RECORD_OUTPUT RECORDING_DIR="${RECORDING_DIR}"
+    PRIVATE SOURCE_VIDEO_BASENAME="${SOURCE_VIDEO_BASENAME}"
+  )
+
+  target_link_libraries(aja_capture_test
+    PRIVATE
+    holoscan::core
+    holoscan::ops::aja
+    holoscan::ops::holoviz
+    holoscan::ops::video_stream_replayer
+    holoscan::ops::video_stream_recorder
+    holoscan::ops::format_converter
+  )
+
+  # Add the test and make sure it runs
+  add_test(NAME EXAMPLE_CPP_AJA_CAPTURE_TEST
+    COMMAND ${CMAKE_CURRENT_BINARY_DIR}/aja_capture_test ${CONFIG_FILE}
+    WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
+  )
+  set_tests_properties(EXAMPLE_CPP_AJA_CAPTURE_TEST PROPERTIES
+    PASS_REGULAR_EXPRESSION "Scheduler stopped: Some entities are waiting for execution"
+  )
+
+  # Add a test to check the validity of the frames
+  add_test(NAME EXAMPLE_CPP_AJA_CAPTURE_RENDER_TEST
+      COMMAND python3 ${CMAKE_SOURCE_DIR}/scripts/video_validation.py
+      --source_video_dir ${RECORDING_DIR}
+      --source_video_basename ${SOURCE_VIDEO_BASENAME}
+      --output_dir ${RECORDING_DIR}
+      --validation_frames_dir ${VALIDATION_FRAMES_DIR}
+      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
+  )
+
+  set_tests_properties(EXAMPLE_CPP_AJA_CAPTURE_RENDER_TEST PROPERTIES
+  DEPENDS EXAMPLE_CPP_AJA_CAPTURE_TEST
+  PASS_REGULAR_EXPRESSION "Valid video output!"
+  )
+
+endif()
+
diff --git a/examples/aja_capture/cpp/aja_capture.cpp b/examples/aja_capture/cpp/aja_capture.cpp
index 7b32c2b..3f4c786 100644
--- a/examples/aja_capture/cpp/aja_capture.cpp
+++ b/examples/aja_capture/cpp/aja_capture.cpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -24,7 +24,8 @@ class App : public holoscan::Application {
   void compose() override {
     using namespace holoscan;
 
-    auto source = make_operator<ops::AJASourceOp>("aja", from_config("aja"));
+    auto source = make_operator<ops::AJASourceOp>("aja", from_config("aja"),
+                                make_condition<CountCondition>(from_config("aja.count")));
     auto visualizer = make_operator<ops::HolovizOp>("holoviz", from_config("holoviz"));
 
     // Flow definition
@@ -38,6 +39,8 @@ int main(int argc, char** argv) {
   // Get the configuration
   auto config_path = std::filesystem::canonical(argv[0]).parent_path();
   config_path /= std::filesystem::path("aja_capture.yaml");
+  if (argc >= 2) { config_path = argv[1]; }
+
   app.config(config_path);
 
   app.run();
diff --git a/examples/aja_capture/cpp/aja_capture.yaml b/examples/aja_capture/cpp/aja_capture.yaml
index c901c11..2836180 100644
--- a/examples/aja_capture/cpp/aja_capture.yaml
+++ b/examples/aja_capture/cpp/aja_capture.yaml
@@ -1,5 +1,5 @@
 %YAML 1.2
-# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,9 +17,11 @@
 aja:
   width: 1920
   height: 1080
+  framerate: 60
   rdma: true
   enable_overlay: false
   overlay_rdma: true
+  count: -1
 
 holoviz:
   width: 854
diff --git a/examples/aja_capture/python/CMakeLists.min.txt b/examples/aja_capture/python/CMakeLists.min.txt
new file mode 100644
index 0000000..299fafd
--- /dev/null
+++ b/examples/aja_capture/python/CMakeLists.min.txt
@@ -0,0 +1,67 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Testing
+if(BUILD_TESTING AND BUILD_AJA)
+
+  set(RECORDING_DIR ${CMAKE_CURRENT_BINARY_DIR}/recording_output)
+  set(SOURCE_VIDEO_BASENAME python_aja_capture_output)
+  set(VALIDATION_FRAMES_DIR ${CMAKE_SOURCE_DIR}/testing/validation_frames/aja_capture/)
+
+  file(MAKE_DIRECTORY ${RECORDING_DIR})
+
+  file(READ ${CMAKE_CURRENT_SOURCE_DIR}/aja_capture.yaml CONFIG_STRING)
+  string(REPLACE "count: -1" "count: 10" CONFIG_STRING ${CONFIG_STRING})
+  string(APPEND CONFIG_STRING "  enable_render_buffer_output: true\n\nrecorder:\n  directory: \"${RECORDING_DIR}\"\n  basename: \"${SOURCE_VIDEO_BASENAME}\"")
+  set(CONFIG_FILE ${CMAKE_CURRENT_BINARY_DIR}/python_aja_capture_config.yaml)
+  file(WRITE ${CONFIG_FILE} ${CONFIG_STRING})
+
+  # Patch the current example to enable recording the rendering window
+  add_custom_command(OUTPUT aja_capture_test.py
+    PRE_LINK
+    COMMAND patch -u -o aja_capture_test.py ${CMAKE_CURRENT_SOURCE_DIR}/aja_capture.py
+        ${CMAKE_SOURCE_DIR}/testing/validation_frames/aja_capture/python_aja_capture.patch
+  )
+
+  add_custom_target(python_aja_capture_test ALL
+    DEPENDS "aja_capture_test.py"
+  )
+
+  add_test(NAME EXAMPLE_PYTHON_AJA_CAPTURE_TEST
+    COMMAND python3 aja_capture_test.py --config python_aja_capture_config.yaml
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  )
+
+  set_tests_properties(EXAMPLE_PYTHON_AJA_CAPTURE_TEST PROPERTIES
+    DEPENDS "aja_capture_test.py"
+    PASS_REGULAR_EXPRESSION "Scheduler stopped: Some entities are waiting for execution"
+  )
+
+  # Add a test to check the validity of the frames
+  add_test(NAME EXAMPLE_PYTHON_AJA_CAPTURE_RENDER_TEST
+      COMMAND python3 ${CMAKE_SOURCE_DIR}/../bin/video_validation.py
+      --source_video_dir ${RECORDING_DIR}
+      --source_video_basename ${SOURCE_VIDEO_BASENAME}
+      --output_dir ${RECORDING_DIR}
+      --validation_frames_dir ${VALIDATION_FRAMES_DIR}
+      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
+  )
+
+  set_tests_properties(EXAMPLE_PYTHON_AJA_CAPTURE_RENDER_TEST PROPERTIES
+    DEPENDS EXAMPLE_PYTHON_AJA_CAPTURE_TEST
+    PASS_REGULAR_EXPRESSION "Valid video output!"
+  )
+
+endif()
\ No newline at end of file
diff --git a/examples/aja_capture/python/CMakeLists.txt b/examples/aja_capture/python/CMakeLists.txt
index 4c79909..4bb4149 100644
--- a/examples/aja_capture/python/CMakeLists.txt
+++ b/examples/aja_capture/python/CMakeLists.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -26,6 +26,67 @@ add_custom_target(python_aja_capture ALL
 # Install the app
 install(FILES
     "${CMAKE_CURRENT_SOURCE_DIR}/aja_capture.py"
+    "${CMAKE_CURRENT_SOURCE_DIR}/aja_capture.yaml"
     DESTINATION "${app_relative_dest_path}"
     COMPONENT "holoscan-examples"
 )
+
+# Install the minimal CMakeLists.txt file
+install(FILES CMakeLists.min.txt
+  RENAME "CMakeLists.txt"
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT holoscan-examples
+)
+
+# Testing
+if(HOLOSCAN_BUILD_TESTS AND TEST_AJA)
+
+  set(RECORDING_DIR ${CMAKE_CURRENT_BINARY_DIR}/recording_output)
+  set(SOURCE_VIDEO_BASENAME python_aja_capture_output)
+  set(VALIDATION_FRAMES_DIR ${CMAKE_SOURCE_DIR}/tests/data/validation_frames/aja_capture/)
+
+  file(MAKE_DIRECTORY ${RECORDING_DIR})
+
+  file(READ ${CMAKE_CURRENT_SOURCE_DIR}/aja_capture.yaml CONFIG_STRING)
+  string(REPLACE "count: -1" "count: 10" CONFIG_STRING ${CONFIG_STRING})
+  string(APPEND CONFIG_STRING "  enable_render_buffer_output: true\n\nrecorder:\n  directory: \"${RECORDING_DIR}\"\n  basename: \"${SOURCE_VIDEO_BASENAME}\"")
+  set(CONFIG_FILE ${CMAKE_CURRENT_BINARY_DIR}/python_aja_capture_config.yaml)
+  file(WRITE ${CONFIG_FILE} ${CONFIG_STRING})
+
+  # Patch the current example to enable recording the rendering window
+  add_custom_command(OUTPUT aja_capture_test.py
+    PRE_LINK
+    COMMAND patch -u -o aja_capture_test.py ${CMAKE_CURRENT_SOURCE_DIR}/aja_capture.py
+        ${CMAKE_SOURCE_DIR}/tests/data/validation_frames/aja_capture/python_aja_capture.patch
+  )
+
+  add_custom_target(python_aja_capture_test ALL
+    DEPENDS "aja_capture_test.py"
+  )
+
+  add_test(NAME EXAMPLE_PYTHON_AJA_CAPTURE_TEST
+    COMMAND python3 aja_capture_test.py --config python_aja_capture_config.yaml
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  )
+
+  set_tests_properties(EXAMPLE_PYTHON_AJA_CAPTURE_TEST PROPERTIES
+    DEPENDS "aja_capture_test.py"
+    PASS_REGULAR_EXPRESSION "Scheduler stopped: Some entities are waiting for execution"
+  )
+
+  # Add a test to check the validity of the frames
+  add_test(NAME EXAMPLE_PYTHON_AJA_CAPTURE_RENDER_TEST
+      COMMAND python3 ${CMAKE_SOURCE_DIR}/scripts/video_validation.py
+      --source_video_dir ${RECORDING_DIR}
+      --source_video_basename ${SOURCE_VIDEO_BASENAME}
+      --output_dir ${RECORDING_DIR}
+      --validation_frames_dir ${VALIDATION_FRAMES_DIR}
+      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
+  )
+
+  set_tests_properties(EXAMPLE_PYTHON_AJA_CAPTURE_RENDER_TEST PROPERTIES
+    DEPENDS EXAMPLE_PYTHON_AJA_CAPTURE_TEST
+    PASS_REGULAR_EXPRESSION "Valid video output!"
+  )
+
+endif()
diff --git a/examples/aja_capture/python/aja_capture.py b/examples/aja_capture/python/aja_capture.py
index 13cd689..99afa2a 100644
--- a/examples/aja_capture/python/aja_capture.py
+++ b/examples/aja_capture/python/aja_capture.py
@@ -15,6 +15,9 @@
  limitations under the License.
 """  # noqa: E501
 
+import os
+
+from holoscan.conditions import CountCondition
 from holoscan.core import Application
 from holoscan.operators import AJASourceOp, HolovizOp
 
@@ -31,34 +34,25 @@ class AJACaptureApp(Application):
     """
 
     def compose(self):
-        width = 1920
-        height = 1080
-
-        source = AJASourceOp(
-            self,
-            name="aja",
-            width=width,
-            height=height,
-            rdma=True,
-            enable_overlay=False,
-            overlay_rdma=True,
-        )
-
-        visualizer = HolovizOp(
-            self,
-            name="holoviz",
-            width=width,
-            height=height,
-            tensors=[{"name": "", "type": "color", "opacity": 1.0, "priority": 0}],
-        )
+        args_aja = self.kwargs("aja")
+
+        count = args_aja["count"]
+        args_aja.pop("count")
+
+        source = AJASourceOp(self, CountCondition(self, count), name="aja", **args_aja)
+
+        visualizer = HolovizOp(self, name="holoviz", **self.kwargs("holoviz"))
 
         self.add_flow(source, visualizer, {("video_buffer_output", "receivers")})
 
 
-def main():
+def main(config_file):
     app = AJACaptureApp()
+    # if the --config command line argument was provided, it will override this config_file
+    app.config(config_file)
     app.run()
 
 
 if __name__ == "__main__":
-    main()
+    config_file = os.path.join(os.path.dirname(__file__), "aja_capture.yaml")
+    main(config_file=config_file)
diff --git a/examples/aja_capture/python/aja_capture.yaml b/examples/aja_capture/python/aja_capture.yaml
new file mode 100644
index 0000000..2836180
--- /dev/null
+++ b/examples/aja_capture/python/aja_capture.yaml
@@ -0,0 +1,33 @@
+%YAML 1.2
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+aja:
+  width: 1920
+  height: 1080
+  framerate: 60
+  rdma: true
+  enable_overlay: false
+  overlay_rdma: true
+  count: -1
+
+holoviz:
+  width: 854
+  height: 480
+  tensors:
+    - name: ""
+      type: color
+      opacity: 1.0
+      priority: 0
diff --git a/examples/aja_capture/python/aja_capture_59Hz.yaml b/examples/aja_capture/python/aja_capture_59Hz.yaml
new file mode 100644
index 0000000..1de3b80
--- /dev/null
+++ b/examples/aja_capture/python/aja_capture_59Hz.yaml
@@ -0,0 +1,33 @@
+%YAML 1.2
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+aja:
+  width: 1280
+  height: 720
+  rdma: true
+  enable_overlay: false
+  overlay_rdma: true
+  framerate: 59
+  count: -1
+
+holoviz:
+  width: 854
+  height: 480
+  tensors:
+    - name: ""
+      type: color
+      opacity: 1.0
+      priority: 0
diff --git a/examples/aja_capture/python/aja_capture_60Hz.yaml b/examples/aja_capture/python/aja_capture_60Hz.yaml
new file mode 100644
index 0000000..e8220c4
--- /dev/null
+++ b/examples/aja_capture/python/aja_capture_60Hz.yaml
@@ -0,0 +1,33 @@
+%YAML 1.2
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+aja:
+  width: 1280
+  height: 720
+  rdma: true
+  enable_overlay: false
+  overlay_rdma: true
+  framerate: 60
+  count: -1
+
+holoviz:
+  width: 854
+  height: 480
+  tensors:
+    - name: ""
+      type: color
+      opacity: 1.0
+      priority: 0
diff --git a/examples/bring_your_own_model/python/CMakeLists.min.txt b/examples/bring_your_own_model/python/CMakeLists.min.txt
index 51c264d..3e0a815 100644
--- a/examples/bring_your_own_model/python/CMakeLists.min.txt
+++ b/examples/bring_your_own_model/python/CMakeLists.min.txt
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 # Testing
-if(HOLOSCAN_BUILD_TESTS)
+if(BUILD_TESTING)
 
   file(READ ${CMAKE_CURRENT_SOURCE_DIR}/byom.yaml CONFIG_STRING)
   string(REPLACE "count: 0" "count: 10" CONFIG_STRING ${CONFIG_STRING})
@@ -22,8 +22,8 @@ if(HOLOSCAN_BUILD_TESTS)
   file(WRITE ${CONFIG_FILE} ${CONFIG_STRING})
 
   add_test(NAME EXAMPLE_PYTHON_BYOM_TEST
-    COMMAND python3 byom.py --config python_byom_config.yaml
-    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+    COMMAND python3 byom.py --config ${CMAKE_CURRENT_BINARY_DIR}/python_byom_config.yaml
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
   )
 
   set_tests_properties(EXAMPLE_PYTHON_BYOM_TEST PROPERTIES
diff --git a/examples/conditions/asynchronous/python/CMakeLists.min.txt b/examples/conditions/asynchronous/python/CMakeLists.min.txt
new file mode 100644
index 0000000..1c99b31
--- /dev/null
+++ b/examples/conditions/asynchronous/python/CMakeLists.min.txt
@@ -0,0 +1,27 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the \"License\");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an \"AS IS\" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Testing
+if(BUILD_TESTING)
+  add_test(NAME EXAMPLE_PYTHON_PING_ASYNC_TEST
+    COMMAND python3 ping_async.py --delay=100 --count=5
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  )
+  set_tests_properties(EXAMPLE_PYTHON_PING_ASYNC_TEST PROPERTIES
+    PASS_REGULAR_EXPRESSION "Rx message value: 5"
+    PASS_REGULAR_EXPRESSION "waiting for 0.1 s in AsyncPingTxOp.async_send"
+    PASS_REGULAR_EXPRESSION "waiting for 0.1 s in AsyncPingRxOp.async_receive"
+  )
+endif()
diff --git a/examples/conditions/asynchronous/python/CMakeLists.txt b/examples/conditions/asynchronous/python/CMakeLists.txt
index aac597f..9cfc6db 100644
--- a/examples/conditions/asynchronous/python/CMakeLists.txt
+++ b/examples/conditions/asynchronous/python/CMakeLists.txt
@@ -30,6 +30,13 @@ install(FILES
     COMPONENT "holoscan-examples"
 )
 
+# Install the minimal CMakeLists.txt file
+install(FILES CMakeLists.min.txt
+  RENAME "CMakeLists.txt"
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT holoscan-examples
+)
+
 # Testing
 if(HOLOSCAN_BUILD_TESTS)
   add_test(NAME EXAMPLE_PYTHON_PING_ASYNC_TEST
diff --git a/examples/flow_tracker/python/CMakeLists.min.txt b/examples/flow_tracker/python/CMakeLists.min.txt
new file mode 100644
index 0000000..74a7295
--- /dev/null
+++ b/examples/flow_tracker/python/CMakeLists.min.txt
@@ -0,0 +1,24 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the \"License\");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an \"AS IS\" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Testing
+if(BUILD_TESTING)
+  add_test(NAME EXAMPLE_PYTHON_FLOW_TRACKER_TEST
+    COMMAND python3 flow_tracker.py
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  )
+  set_tests_properties(EXAMPLE_PYTHON_FLOW_TRACKER_TEST PROPERTIES
+                       PASS_REGULAR_EXPRESSION "Data Flow Tracking Results")
+endif()
diff --git a/examples/flow_tracker/python/CMakeLists.txt b/examples/flow_tracker/python/CMakeLists.txt
index fef2518..d9377ce 100644
--- a/examples/flow_tracker/python/CMakeLists.txt
+++ b/examples/flow_tracker/python/CMakeLists.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -30,6 +30,13 @@ install(FILES
     COMPONENT "holoscan-examples"
 )
 
+# Install the minimal CMakeLists.txt file
+install(FILES CMakeLists.min.txt
+  RENAME "CMakeLists.txt"
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT holoscan-examples
+)
+
 # Testing
 if(HOLOSCAN_BUILD_TESTS)
   add_test(NAME EXAMPLE_PYTHON_FLOW_TRACKER_TEST
diff --git a/examples/hello_world/python/CMakeLists.min.txt b/examples/hello_world/python/CMakeLists.min.txt
new file mode 100644
index 0000000..05f357e
--- /dev/null
+++ b/examples/hello_world/python/CMakeLists.min.txt
@@ -0,0 +1,25 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the \"License\");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an \"AS IS\" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Testing
+if(BUILD_TESTING)
+  add_test(NAME EXAMPLE_PYTHON_HELLO_WORLD_TEST
+    COMMAND python3 hello_world.py
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  )
+  set_tests_properties(EXAMPLE_PYTHON_HELLO_WORLD_TEST PROPERTIES
+    PASS_REGULAR_EXPRESSION "Hello World!"
+  )
+endif()
diff --git a/examples/hello_world/python/CMakeLists.txt b/examples/hello_world/python/CMakeLists.txt
index 08bfe77..c201316 100644
--- a/examples/hello_world/python/CMakeLists.txt
+++ b/examples/hello_world/python/CMakeLists.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -30,6 +30,13 @@ install(FILES
     COMPONENT "holoscan-examples"
 )
 
+# Install the minimal CMakeLists.txt file
+install(FILES CMakeLists.min.txt
+  RENAME "CMakeLists.txt"
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT holoscan-examples
+)
+
 # Testing
 if(HOLOSCAN_BUILD_TESTS)
   add_test(NAME EXAMPLE_PYTHON_HELLO_WORLD_TEST
diff --git a/examples/holoviz/cpp/CMakeLists.min.txt b/examples/holoviz/cpp/CMakeLists.min.txt
index 98505a4..b75d42a 100644
--- a/examples/holoviz/cpp/CMakeLists.min.txt
+++ b/examples/holoviz/cpp/CMakeLists.min.txt
@@ -102,4 +102,14 @@ if(BUILD_TESTING)
     DEPENDS EXAMPLE_CPP_HOLOVIZ_GEOMETRY_TEST
     PASS_REGULAR_EXPRESSION "Valid video output!"
   )
+
+  # Add the camera example and make sure it runs
+  add_test(NAME EXAMPLE_CPP_HOLOVIZ_CAMERA_TEST
+    COMMAND ${CMAKE_CURRENT_BINARY_DIR}/holoviz_camera --count 120
+    WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
+  )
+  set_tests_properties(EXAMPLE_CPP_HOLOVIZ_CAMERA_TEST PROPERTIES
+    PASS_REGULAR_EXPRESSION "Received camera pose:"
+    PASS_REGULAR_EXPRESSION "Scheduler stopped: Some entities are waiting for execution, but there are no periodic or async entities to get out of the deadlock."
+  )
 endif()
diff --git a/examples/multi_branch_pipeline/python/CMakeLists.min.txt b/examples/multi_branch_pipeline/python/CMakeLists.min.txt
new file mode 100644
index 0000000..230741b
--- /dev/null
+++ b/examples/multi_branch_pipeline/python/CMakeLists.min.txt
@@ -0,0 +1,37 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the \"License\");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an \"AS IS\" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Testing
+if(BUILD_TESTING)
+  add_test(NAME EXAMPLE_PYTHON_MULTI_BRANCH_TEST
+    COMMAND python3 multi_branch_pipeline.py
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  )
+  # rx2 should receive close to 100 message (values 0 - 99)
+  # rx1 will receive only some of these, but it is hard to know exactly which ones so just verify the first
+  set_tests_properties(EXAMPLE_PYTHON_MULTI_BRANCH_TEST PROPERTIES
+                       PASS_REGULAR_EXPRESSION "receiver 'rx2' received value: 90"
+                       PASS_REGULAR_EXPRESSION "receiver 'rx1' received value: 0")
+
+  add_test(NAME EXAMPLE_PYTHON_MULTI_BRANCH_EVENT_BASED_TEST
+    COMMAND python3 multi_branch_pipeline.py --event_based
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  )
+  # rx2 should receive close to 100 message (values 0 - 99)
+  # rx1 will receive only some of these, but it is hard to know exactly which ones so just verify the first
+  set_tests_properties(EXAMPLE_PYTHON_MULTI_BRANCH_TEST PROPERTIES
+                       PASS_REGULAR_EXPRESSION "receiver 'rx2' received value: 90"
+                       PASS_REGULAR_EXPRESSION "receiver 'rx1' received value: 0")
+endif()
diff --git a/examples/multi_branch_pipeline/python/CMakeLists.txt b/examples/multi_branch_pipeline/python/CMakeLists.txt
index 3534a68..5f7d945 100644
--- a/examples/multi_branch_pipeline/python/CMakeLists.txt
+++ b/examples/multi_branch_pipeline/python/CMakeLists.txt
@@ -30,6 +30,13 @@ install(FILES
     COMPONENT "holoscan-examples"
 )
 
+# Install the minimal CMakeLists.txt file
+install(FILES CMakeLists.min.txt
+  RENAME "CMakeLists.txt"
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT holoscan-examples
+)
+
 # Testing
 if(HOLOSCAN_BUILD_TESTS)
   add_test(NAME EXAMPLE_PYTHON_MULTI_BRANCH_TEST
diff --git a/examples/multithread/python/CMakeLists.min.txt b/examples/multithread/python/CMakeLists.min.txt
new file mode 100644
index 0000000..e8cf2a2
--- /dev/null
+++ b/examples/multithread/python/CMakeLists.min.txt
@@ -0,0 +1,31 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the \"License\");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an \"AS IS\" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Testing
+if(BUILD_TESTING)
+  add_test(NAME EXAMPLE_PYTHON_MULTITHREAD_TEST
+    COMMAND python3 multithread.py
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  )
+  set_tests_properties(EXAMPLE_PYTHON_MULTITHREAD_TEST PROPERTIES
+                       PASS_REGULAR_EXPRESSION "sum of received values: 496")
+
+  add_test(NAME EXAMPLE_PYTHON_MULTITHREAD_EVENT_BASED_TEST
+    COMMAND python3 multithread.py --event_based
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  )
+  set_tests_properties(EXAMPLE_PYTHON_MULTITHREAD_TEST PROPERTIES
+                       PASS_REGULAR_EXPRESSION "sum of received values: 496")
+endif()
diff --git a/examples/multithread/python/CMakeLists.txt b/examples/multithread/python/CMakeLists.txt
index 92a6229..cdef557 100644
--- a/examples/multithread/python/CMakeLists.txt
+++ b/examples/multithread/python/CMakeLists.txt
@@ -30,6 +30,13 @@ install(FILES
     COMPONENT "holoscan-examples"
 )
 
+# Install the minimal CMakeLists.txt file
+install(FILES CMakeLists.min.txt
+  RENAME "CMakeLists.txt"
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT holoscan-examples
+)
+
 # Testing
 if(HOLOSCAN_BUILD_TESTS)
   add_test(NAME EXAMPLE_PYTHON_MULTITHREAD_TEST
diff --git a/examples/numpy_native/CMakeLists.min.txt b/examples/numpy_native/CMakeLists.min.txt
new file mode 100644
index 0000000..8084795
--- /dev/null
+++ b/examples/numpy_native/CMakeLists.min.txt
@@ -0,0 +1,24 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the \"License\");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an \"AS IS\" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Testing
+if(BUILD_TESTING)
+  add_test(NAME EXAMPLE_PYTHON_CONVOLVE_TEST
+    COMMAND python3 convolve.py
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  )
+  set_tests_properties(EXAMPLE_PYTHON_CONVOLVE_TEST PROPERTIES
+                       PASS_REGULAR_EXPRESSION "\[1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\]")
+endif()
diff --git a/examples/numpy_native/CMakeLists.txt b/examples/numpy_native/CMakeLists.txt
index 5975eaf..e900896 100644
--- a/examples/numpy_native/CMakeLists.txt
+++ b/examples/numpy_native/CMakeLists.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -31,6 +31,13 @@ install(FILES
     COMPONENT "holoscan-examples"
 )
 
+# Install the minimal CMakeLists.txt file
+install(FILES CMakeLists.min.txt
+  RENAME "CMakeLists.txt"
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT holoscan-examples
+)
+
 # Testing
 if(HOLOSCAN_BUILD_TESTS)
   add_test(NAME EXAMPLE_PYTHON_CONVOLVE_TEST
diff --git a/examples/ping_custom_op/python/CMakeLists.min.txt b/examples/ping_custom_op/python/CMakeLists.min.txt
new file mode 100644
index 0000000..25f1b84
--- /dev/null
+++ b/examples/ping_custom_op/python/CMakeLists.min.txt
@@ -0,0 +1,26 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the \"License\");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an \"AS IS\" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Testing
+if(BUILD_TESTING)
+  add_test(NAME EXAMPLE_PYTHON_PING_CUSTOM_OP_TEST
+    COMMAND python3 ping_custom_op.py
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  )
+  set_tests_properties(EXAMPLE_PYTHON_PING_CUSTOM_OP_TEST PROPERTIES
+    PASS_REGULAR_EXPRESSION "Middle message value: 10"
+    PASS_REGULAR_EXPRESSION "Rx message value: 30"
+  )
+endif()
diff --git a/examples/ping_custom_op/python/CMakeLists.txt b/examples/ping_custom_op/python/CMakeLists.txt
index 1a839d5..1dd6090 100644
--- a/examples/ping_custom_op/python/CMakeLists.txt
+++ b/examples/ping_custom_op/python/CMakeLists.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -30,6 +30,13 @@ install(FILES
     COMPONENT "holoscan-examples"
 )
 
+# Install the minimal CMakeLists.txt file
+install(FILES CMakeLists.min.txt
+  RENAME "CMakeLists.txt"
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT holoscan-examples
+)
+
 # Testing
 if(HOLOSCAN_BUILD_TESTS)
   add_test(NAME EXAMPLE_PYTHON_PING_CUSTOM_OP_TEST
diff --git a/examples/ping_distributed/README.md b/examples/ping_distributed/README.md
index 94b55ef..5e07f85 100644
--- a/examples/ping_distributed/README.md
+++ b/examples/ping_distributed/README.md
@@ -11,6 +11,15 @@ The `--gpu` command line argument can be provided to indicate that the tensor sh
 
 *Visit the [SDK User Guide](https://docs.nvidia.com/holoscan/sdk-user-guide/holoscan_create_distributed_app.html) to learn more about distributed applications.*
 
+#### Note on error logged by the application
+Note that it is currently expected that this application logs the following error during shutdown
+
+```text
+[error] [ucx_context.cpp:466] Connection dropped with status -25 (Connection reset by remote peer)
+```
+
+This will be logged by the worker that is running "fragment2" after "fragment1" has sent all messages. It is caused by fragment 1 starting to shutdown after its last message has been sent, resulting in severing of connections from fragment 2 receivers to fragment 1 transmitters.
+
 ## C++ Run instructions
 
 Please refer to the [user guide](https://docs.nvidia.com/holoscan/sdk-user-guide/holoscan_create_distributed_app.html#building-and-running-a-distributed-application) for instructions on how to run the application in a distributed manner.
diff --git a/examples/ping_distributed/cpp/CMakeLists.min.txt b/examples/ping_distributed/cpp/CMakeLists.min.txt
index 823ea43..d4f6d15 100644
--- a/examples/ping_distributed/cpp/CMakeLists.min.txt
+++ b/examples/ping_distributed/cpp/CMakeLists.min.txt
@@ -22,14 +22,13 @@ find_package(holoscan REQUIRED CONFIG
 
 add_executable(ping_distributed
   ping_distributed.cpp
-  ping_distributed_ops.cpp
 )
 
 target_link_libraries(ping_distributed
   PRIVATE
   holoscan::core
-  holoscan::ops::ping_tx
-  holoscan::ops::ping_rx
+  holoscan::ops::ping_tensor_tx
+  holoscan::ops::ping_tensor_rx
 )
 
 # Testing
@@ -39,7 +38,7 @@ if(HOLOSCAN_BUILD_TESTS)
     WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
   )
   set_tests_properties(EXAMPLE_CPP_PING_DISTRIBUTED_TEST PROPERTIES
-    PASS_REGULAR_EXPRESSION "message 10: Tensor key: 'out', shape: \\(32, 64\\)"
+    PASS_REGULAR_EXPRESSION "rx received message 10: Tensor key: 'tensor', shape: \\(32, 64\\)"
   )
 
   add_test(NAME EXAMPLE_CPP_PING_DISTRIBUTED_GPU_TEST
@@ -47,7 +46,7 @@ if(HOLOSCAN_BUILD_TESTS)
     WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
   )
   set_tests_properties(EXAMPLE_CPP_PING_DISTRIBUTED_GPU_TEST PROPERTIES
-    PASS_REGULAR_EXPRESSION "message 10: Tensor key: 'out', shape: \\(32, 64\\)"
+    PASS_REGULAR_EXPRESSION "rx received message 10: Tensor key: 'tensor', shape: \\(32, 64\\)"
   )
 
   add_test(NAME EXAMPLE_CPP_PING_DISTRIBUTED_HELP_STRING_TEST
diff --git a/examples/ping_distributed/cpp/CMakeLists.txt b/examples/ping_distributed/cpp/CMakeLists.txt
index 3fd0885..f50f5fa 100644
--- a/examples/ping_distributed/cpp/CMakeLists.txt
+++ b/examples/ping_distributed/cpp/CMakeLists.txt
@@ -16,13 +16,12 @@
 # Create examples
 add_executable(ping_distributed
   ping_distributed.cpp
-  ping_distributed_ops.cpp
 )
 target_link_libraries(ping_distributed
   PUBLIC
   holoscan::core
-  holoscan::ops::ping_tx
-  holoscan::ops::ping_rx
+  holoscan::ops::ping_tensor_tx
+  holoscan::ops::ping_tensor_rx
 )
 
 # Install examples
@@ -40,8 +39,6 @@ if(HOLOSCAN_INSTALL_EXAMPLE_SOURCE)
 install(
   FILES
     ping_distributed.cpp
-    ping_distributed_ops.cpp
-    ping_distributed_ops.hpp
   DESTINATION "${app_relative_dest_path}"
   COMPONENT holoscan-examples
 )
@@ -67,7 +64,7 @@ if(HOLOSCAN_BUILD_TESTS)
     WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
   )
   set_tests_properties(EXAMPLE_CPP_PING_DISTRIBUTED_TEST PROPERTIES
-    PASS_REGULAR_EXPRESSION "message 10: Tensor key: 'out', shape: \\(32, 64\\)"
+    PASS_REGULAR_EXPRESSION "rx received message 10: Tensor key: 'tensor', shape: \\(32, 64\\)"
     FAIL_REGULAR_EXPRESSION "initialized independent of a parent entity"
   )
 
@@ -76,7 +73,7 @@ if(HOLOSCAN_BUILD_TESTS)
     WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
   )
   set_tests_properties(EXAMPLE_CPP_PING_DISTRIBUTED_GPU_TEST PROPERTIES
-    PASS_REGULAR_EXPRESSION "message 10: Tensor key: 'out', shape: \\(32, 64\\)"
+    PASS_REGULAR_EXPRESSION "rx received message 10: Tensor key: 'tensor', shape: \\(32, 64\\)"
     FAIL_REGULAR_EXPRESSION "initialized independent of a parent entity"
   )
 
diff --git a/examples/ping_distributed/cpp/ping_distributed.cpp b/examples/ping_distributed/cpp/ping_distributed.cpp
index 0e4e7ab..6529ac9 100644
--- a/examples/ping_distributed/cpp/ping_distributed.cpp
+++ b/examples/ping_distributed/cpp/ping_distributed.cpp
@@ -20,8 +20,8 @@
 #include <vector>
 
 #include <holoscan/holoscan.hpp>
-
-#include "ping_distributed_ops.hpp"
+#include <holoscan/operators/ping_tensor_rx/ping_tensor_rx.hpp>
+#include <holoscan/operators/ping_tensor_tx/ping_tensor_tx.hpp>
 
 class Fragment1 : public holoscan::Fragment {
  public:
@@ -37,14 +37,16 @@ class Fragment1 : public holoscan::Fragment {
 
   void compose() override {
     using namespace holoscan;
-    auto tx = make_operator<ops::PingTensorTxOp>("tx",
-                                                 make_condition<CountCondition>(count_),
-                                                 Arg("tensor_on_gpu", gpu_tensor_),
-                                                 Arg("batch_size", batch_size_),
-                                                 Arg("rows", rows_),
-                                                 Arg("columns", columns_),
-                                                 Arg("channels", channels_),
-                                                 Arg("data_type", data_type_));
+
+    auto tx = make_operator<ops::PingTensorTxOp>(
+        "tx",
+        make_condition<CountCondition>(count_),
+        Arg("storage_type", std::string{gpu_tensor_ ? "device" : "system"}),
+        Arg("batch_size", batch_size_),
+        Arg("rows", rows_),
+        Arg("columns", columns_),
+        Arg("channels", channels_),
+        Arg("data_type", data_type_));
     add_operator(tx);
   }
 
diff --git a/examples/ping_distributed/cpp/ping_distributed_ops.hpp b/examples/ping_distributed/cpp/ping_distributed_ops.hpp
deleted file mode 100644
index 14d2712..0000000
--- a/examples/ping_distributed/cpp/ping_distributed_ops.hpp
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <memory>
-#include <string>
-
-#include <holoscan/holoscan.hpp>
-
-#include "gxf/std/allocator.hpp"
-
-namespace holoscan::ops {
-
-class PingTensorTxOp : public holoscan::Operator {
- public:
-  HOLOSCAN_OPERATOR_FORWARD_ARGS(PingTensorTxOp)
-
-  PingTensorTxOp() = default;
-
-  void initialize() override;
-  void setup(OperatorSpec& spec) override;
-  void compute(InputContext&, OutputContext& op_output, ExecutionContext& context) override;
-
-  nvidia::gxf::PrimitiveType element_type() {
-    if (element_type_.has_value()) { return element_type_.value(); }
-    element_type_ = primitive_type(data_type_.get());
-    return element_type_.value();
-  }
-
- private:
-  nvidia::gxf::PrimitiveType primitive_type(const std::string& data_type);
-  std::optional<nvidia::gxf::PrimitiveType> element_type_;
-
-  Parameter<std::shared_ptr<Allocator>> allocator_;
-  Parameter<bool> tensor_on_gpu_;
-  Parameter<int32_t> batch_size_;
-  Parameter<int32_t> rows_;
-  Parameter<int32_t> columns_;
-  Parameter<int32_t> channels_;
-  Parameter<std::string> data_type_;
-  Parameter<std::string> tensor_name_;
-
-  size_t count_{1};
-};
-
-class PingTensorRxOp : public holoscan::Operator {
- public:
-  HOLOSCAN_OPERATOR_FORWARD_ARGS(PingTensorRxOp)
-
-  PingTensorRxOp() = default;
-
-  void setup(OperatorSpec& spec) override;
-  void compute(InputContext& op_input, OutputContext&, ExecutionContext&) override;
-
- private:
-  size_t count_{1};
-};
-}  // namespace holoscan::ops
diff --git a/examples/ping_distributed/python/CMakeLists.min.txt b/examples/ping_distributed/python/CMakeLists.min.txt
new file mode 100644
index 0000000..5004b58
--- /dev/null
+++ b/examples/ping_distributed/python/CMakeLists.min.txt
@@ -0,0 +1,62 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the \"License\");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an \"AS IS\" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Testing
+if(BUILD_TESTING)
+  add_test(NAME EXAMPLE_PYTHON_PING_DISTRIBUTED_TEST
+    COMMAND python3 ping_distributed.py
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  )
+  set_tests_properties(EXAMPLE_PYTHON_PING_DISTRIBUTED_TEST PROPERTIES
+    PASS_REGULAR_EXPRESSION "message 10: Tensor key: 'out', shape: \\(32, 64\\)"
+    FAIL_REGULAR_EXPRESSION "AssertionError:"
+    FAIL_REGULAR_EXPRESSION "initialized independent of a parent entity"
+    FAIL_REGULAR_EXPRESSION "Unable to convert argument type"
+  )
+
+  add_test(NAME EXAMPLE_PYTHON_PING_DISTRIBUTED_GPU_TEST
+    COMMAND python3 ping_distributed.py --gpu
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  )
+  set_tests_properties(EXAMPLE_PYTHON_PING_DISTRIBUTED_GPU_TEST PROPERTIES
+    PASS_REGULAR_EXPRESSION "message 10: Tensor key: 'out', shape: \\(32, 64\\)"
+    FAIL_REGULAR_EXPRESSION "AssertionError:"
+    FAIL_REGULAR_EXPRESSION "initialized independent of a parent entity"
+    FAIL_REGULAR_EXPRESSION "Unable to convert argument type"
+  )
+
+  add_test(NAME EXAMPLE_PYTHON_PING_DISTRIBUTED_HELP_STRING_TEST
+    COMMAND python3 ping_distributed.py --help
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  )
+  set_tests_properties(EXAMPLE_PYTHON_PING_DISTRIBUTED_HELP_STRING_TEST PROPERTIES
+    PASS_REGULAR_EXPRESSION "Usage: ping_distributed.py \\[OPTIONS\\]"
+    FAIL_REGULAR_EXPRESSION "initialized independent of a parent entity"
+    FAIL_REGULAR_EXPRESSION "Unable to convert argument type"
+  )
+
+  add_test(NAME EXAMPLE_PYTHON_PING_DISTRIBUTED_INVALID_DEVICE_TEST
+    COMMAND python3 ping_distributed.py
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  )
+  # Use a negative id to test with a non-existent device
+  set_tests_properties(EXAMPLE_PYTHON_PING_DISTRIBUTED_INVALID_DEVICE_TEST PROPERTIES
+    ENVIRONMENT "HOLOSCAN_UCX_DEVICE_ID=-5"
+    PASS_REGULAR_EXPRESSION "GPUDevice value found and cached. dev_id: -5"
+    PASS_REGULAR_EXPRESSION "cudaSetDevice Failed - 101, device id -5"
+    FAIL_REGULAR_EXPRESSION "initialized independent of a parent entity"
+    FAIL_REGULAR_EXPRESSION "Unable to convert argument type"
+  )
+endif()
diff --git a/examples/ping_distributed/python/CMakeLists.txt b/examples/ping_distributed/python/CMakeLists.txt
index f9efcd9..122887a 100644
--- a/examples/ping_distributed/python/CMakeLists.txt
+++ b/examples/ping_distributed/python/CMakeLists.txt
@@ -30,6 +30,13 @@ install(FILES
     COMPONENT "holoscan-examples"
 )
 
+# Install the minimal CMakeLists.txt file
+install(FILES CMakeLists.min.txt
+  RENAME "CMakeLists.txt"
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT holoscan-examples
+)
+
 # Testing
 if(HOLOSCAN_BUILD_TESTS)
   add_test(NAME EXAMPLE_PYTHON_PING_DISTRIBUTED_TEST
@@ -37,9 +44,10 @@ if(HOLOSCAN_BUILD_TESTS)
     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
   )
   set_tests_properties(EXAMPLE_PYTHON_PING_DISTRIBUTED_TEST PROPERTIES
-    PASS_REGULAR_EXPRESSION "message 10: Tensor key: 'out', shape: \\(32, 64\\)"
+    PASS_REGULAR_EXPRESSION "rx received message 10: Tensor key: 'out', shape: \\(32, 64\\)"
     FAIL_REGULAR_EXPRESSION "AssertionError:"
     FAIL_REGULAR_EXPRESSION "initialized independent of a parent entity"
+    FAIL_REGULAR_EXPRESSION "Unable to convert argument type"
   )
 
   add_test(NAME EXAMPLE_PYTHON_PING_DISTRIBUTED_GPU_TEST
@@ -47,9 +55,10 @@ if(HOLOSCAN_BUILD_TESTS)
     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
   )
   set_tests_properties(EXAMPLE_PYTHON_PING_DISTRIBUTED_GPU_TEST PROPERTIES
-    PASS_REGULAR_EXPRESSION "message 10: Tensor key: 'out', shape: \\(32, 64\\)"
+    PASS_REGULAR_EXPRESSION "rx received message 10: Tensor key: 'out', shape: \\(32, 64\\)"
     FAIL_REGULAR_EXPRESSION "AssertionError:"
     FAIL_REGULAR_EXPRESSION "initialized independent of a parent entity"
+    FAIL_REGULAR_EXPRESSION "Unable to convert argument type"
   )
 
   add_test(NAME EXAMPLE_PYTHON_PING_DISTRIBUTED_HELP_STRING_TEST
@@ -59,6 +68,7 @@ if(HOLOSCAN_BUILD_TESTS)
   set_tests_properties(EXAMPLE_PYTHON_PING_DISTRIBUTED_HELP_STRING_TEST PROPERTIES
     PASS_REGULAR_EXPRESSION "Usage: ping_distributed.py \\[OPTIONS\\]"
     FAIL_REGULAR_EXPRESSION "initialized independent of a parent entity"
+    FAIL_REGULAR_EXPRESSION "Unable to convert argument type"
   )
 
   add_test(NAME EXAMPLE_PYTHON_PING_DISTRIBUTED_INVALID_DEVICE_TEST
@@ -71,5 +81,6 @@ if(HOLOSCAN_BUILD_TESTS)
     PASS_REGULAR_EXPRESSION "GPUDevice value found and cached. dev_id: -5"
     PASS_REGULAR_EXPRESSION "cudaSetDevice Failed - 101, device id -5"
     FAIL_REGULAR_EXPRESSION "initialized independent of a parent entity"
+    FAIL_REGULAR_EXPRESSION "Unable to convert argument type"
   )
 endif()
diff --git a/examples/ping_distributed/python/ping_distributed.py b/examples/ping_distributed/python/ping_distributed.py
index 61a3c20..5ceea8f 100644
--- a/examples/ping_distributed/python/ping_distributed.py
+++ b/examples/ping_distributed/python/ping_distributed.py
@@ -18,115 +18,11 @@
 import textwrap
 from argparse import ArgumentParser
 
-import cupy as cp
 import numpy as np
 
 from holoscan.conditions import CountCondition
-from holoscan.core import Application, Fragment, Operator, OperatorSpec
-
-# define transmit/receive operators using tensors
-
-
-def data_type_to_numpy_dtype(data_type):
-    if data_type == "uint8_t":
-        return np.uint8
-    elif data_type == "uint16_t":
-        return np.uint16
-    elif data_type == "uint32_t":
-        return np.uint32
-    elif data_type == "uint64_t":
-        return np.uint64
-    elif data_type == "int8_t":
-        return np.int8
-    elif data_type == "int16_t":
-        return np.int16
-    elif data_type == "int32_t":
-        return np.int32
-    elif data_type == "int64_t":
-        return np.int64
-    elif data_type == "float":
-        return np.float32
-    elif data_type == "double":
-        return np.float64
-    elif data_type == "complex<float>":
-        return np.complex64
-    elif data_type == "complex<double>":
-        return np.complex128
-    else:
-        raise ValueError(f"unsupported data_type: {data_type}")
-
-
-class PingTensorTxOp(Operator):
-    """Simple transmitter operator.
-
-    This operator has a single output port:
-        output: "out"
-
-    On each tick, it transmits a tensor on the "out" port.
-    The boolean gpu_tensor argument determines if it is a host or device tensor.
-    """
-
-    def __init__(
-        self,
-        fragment,
-        *args,
-        gpu_tensor=False,
-        shape=(32, 64),
-        dtype=np.uint8,
-        tensor_name="out",
-        **kwargs,
-    ):
-        self.index = 1
-        self.gpu_tensor = gpu_tensor
-        self.shape = shape
-        self.dtype = dtype
-        self.tensor_name = tensor_name
-        # Need to call the base class constructor last
-        super().__init__(fragment, *args, **kwargs)
-
-    def setup(self, spec: OperatorSpec):
-        spec.output("out")
-
-    def compute(self, op_input, op_output, context):
-        xp = cp if self.gpu_tensor else np
-        op_output.emit(dict(out=xp.ones(self.shape, dtype=self.dtype)), self.tensor_name)
-        self.index += 1
-
-
-class PingTensorRxOp(Operator):
-    """Simple receiver operator.
-
-    This operator has a single input port:
-        input: "in"
-
-    This is an example of a native operator with one input port.
-    On each tick, it receives a tensor from the "in" port.
-    """
-
-    def __init__(self, fragment, *args, **kwargs):
-        # Need to call the base class constructor last
-        self.count = 1
-        super().__init__(fragment, *args, **kwargs)
-
-    def setup(self, spec: OperatorSpec):
-        spec.input("in")
-
-    def compute(self, op_input, op_output, context):
-        tensormap = op_input.receive("in")
-        for key, value in tensormap.items():
-            if hasattr(value, "__cuda_array_interface__"):
-                arr = cp.asarray(value)
-            else:
-                arr = np.asarray(value)
-            dtype_name = np.dtype(dtype).name
-            print(
-                f"message {self.count}: Tensor key: '{key}', shape: {arr.shape}, "
-                f"dtype: '{dtype_name}'"
-            )
-        self.count += 1
-
-
-# Now define a simple application using the operators defined above
+from holoscan.core import Application, Fragment
+from holoscan.operators import PingTensorRxOp, PingTensorTxOp
 
 
 class Fragment1(Fragment):
@@ -136,12 +32,16 @@ def __init__(
         gpu_tensor=False,
         count=10,
         shape=(32, 64),
-        dtype=np.uint8,
+        dtype="uint8_t",
         tensor_name="out",
         **kwargs,
     ):
         self.gpu_tensor = gpu_tensor
         self.count = count
+        if len(shape) == 2:
+            self.rows, self.columns = shape
+        else:
+            raise ValueError("expected shape of length 2")
         self.shape = shape
         self.dtype = dtype
         self.tensor_name = tensor_name
@@ -150,11 +50,13 @@ def __init__(
     def compose(self):
         # Configure the operators. Here we use CountCondition to terminate
         # execution after a specific number of messages have been sent.
+        storage_type = "device" if self.gpu_tensor else "system"
         tx = PingTensorTxOp(
             self,
             CountCondition(self, self.count),
-            gpu_tensor=self.gpu_tensor,
-            shape=self.shape,
+            storage_type=storage_type,
+            rows=self.rows,
+            columns=self.columns,
             dtype=self.dtype,
             tensor_name=self.tensor_name,
             name="tx",
@@ -195,8 +97,6 @@ def compose(self):
         # We can skip the "out" and "in" suffixes, as they are the default
         self.add_flow(fragment1, fragment2, {("tx", "rx")})
 
-        # self.resource(self.from_config("resources.fragments"))
-
 
 def main(on_gpu=False, count=10, shape=(64, 32), dtype=np.uint8):
     app = MyPingApp(gpu_tensor=on_gpu, count=count, shape=shape, dtype=dtype)
@@ -291,12 +191,9 @@ def main(on_gpu=False, count=10, shape=(64, 32), dtype=np.uint8):
     # only non-zero dimensions are included in the shape
     shape = tuple(s for s in (args.batch, args.rows, args.columns, args.channels) if s > 0)
 
-    # convert CLI string to numpy.dtype
-    dtype = data_type_to_numpy_dtype(args.data_type)
-
     main(
         on_gpu=args.gpu,
         count=args.count,
         shape=shape,
-        dtype=dtype,
+        dtype=args.data_type,
     )
diff --git a/examples/ping_multi_port/python/CMakeLists.min.txt b/examples/ping_multi_port/python/CMakeLists.min.txt
new file mode 100644
index 0000000..bf19004
--- /dev/null
+++ b/examples/ping_multi_port/python/CMakeLists.min.txt
@@ -0,0 +1,24 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the \"License\");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an \"AS IS\" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Testing
+if(BUILD_TESTING)
+    add_test(NAME EXAMPLE_PYTHON_PING_MULTI_PORT_TEST
+    COMMAND python3 ping_multi_port.py
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  )
+  set_tests_properties(EXAMPLE_PYTHON_PING_MULTI_PORT_TEST PROPERTIES
+                       PASS_REGULAR_EXPRESSION "Rx message value1: 57")
+endif()
diff --git a/examples/ping_multi_port/python/CMakeLists.txt b/examples/ping_multi_port/python/CMakeLists.txt
index 72ef77a..927136a 100644
--- a/examples/ping_multi_port/python/CMakeLists.txt
+++ b/examples/ping_multi_port/python/CMakeLists.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -30,6 +30,13 @@ install(FILES
     COMPONENT "holoscan-examples"
 )
 
+# Install the minimal CMakeLists.txt file
+install(FILES CMakeLists.min.txt
+  RENAME "CMakeLists.txt"
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT holoscan-examples
+)
+
 # Testing
 if(HOLOSCAN_BUILD_TESTS)
   add_test(NAME EXAMPLE_PYTHON_PING_MULTI_PORT_TEST
diff --git a/examples/ping_simple/python/CMakeLists.min.txt b/examples/ping_simple/python/CMakeLists.min.txt
new file mode 100644
index 0000000..5b515f6
--- /dev/null
+++ b/examples/ping_simple/python/CMakeLists.min.txt
@@ -0,0 +1,25 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the \"License\");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an \"AS IS\" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Testing
+if(BUILD_TESTING)
+  add_test(NAME EXAMPLE_PYTHON_PING_SIMPLE_TEST
+    COMMAND python3 ping_simple.py
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  )
+  set_tests_properties(EXAMPLE_PYTHON_PING_SIMPLE_TEST PROPERTIES
+    PASS_REGULAR_EXPRESSION "Rx message value: 10"
+  )
+endif()
diff --git a/examples/ping_simple/python/CMakeLists.txt b/examples/ping_simple/python/CMakeLists.txt
index 5f018ad..5ecbacf 100644
--- a/examples/ping_simple/python/CMakeLists.txt
+++ b/examples/ping_simple/python/CMakeLists.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -30,6 +30,13 @@ install(FILES
     COMPONENT "holoscan-examples"
 )
 
+# Install the minimal CMakeLists.txt file
+install(FILES CMakeLists.min.txt
+  RENAME "CMakeLists.txt"
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT holoscan-examples
+)
+
 # Testing
 if(HOLOSCAN_BUILD_TESTS)
   add_test(NAME EXAMPLE_PYTHON_PING_SIMPLE_TEST
diff --git a/examples/ping_simple_run_async/python/CMakeLists.min.txt b/examples/ping_simple_run_async/python/CMakeLists.min.txt
new file mode 100644
index 0000000..655444d
--- /dev/null
+++ b/examples/ping_simple_run_async/python/CMakeLists.min.txt
@@ -0,0 +1,26 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the \"License\");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an \"AS IS\" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Testing
+if(BUILD_TESTING)
+  add_test(NAME EXAMPLE_PYTHON_PING_SIMPLE_RUN_ASYNC_TEST
+    COMMAND python3 ping_simple_run_async.py
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  )
+  set_tests_properties(EXAMPLE_PYTHON_PING_SIMPLE_RUN_ASYNC_TEST PROPERTIES
+    PASS_REGULAR_EXPRESSION "Rx message value: 10"
+    PASS_REGULAR_EXPRESSION "# Application finished"
+  )
+endif()
diff --git a/examples/ping_simple_run_async/python/CMakeLists.txt b/examples/ping_simple_run_async/python/CMakeLists.txt
index 76a9295..1bc2430 100644
--- a/examples/ping_simple_run_async/python/CMakeLists.txt
+++ b/examples/ping_simple_run_async/python/CMakeLists.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -30,6 +30,13 @@ install(FILES
     COMPONENT "holoscan-examples"
 )
 
+# Install the minimal CMakeLists.txt file
+install(FILES CMakeLists.min.txt
+  RENAME "CMakeLists.txt"
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT holoscan-examples
+)
+
 # Testing
 if(HOLOSCAN_BUILD_TESTS)
   add_test(NAME EXAMPLE_PYTHON_PING_SIMPLE_RUN_ASYNC_TEST
diff --git a/examples/ping_vector/python/CMakeLists.min.txt b/examples/ping_vector/python/CMakeLists.min.txt
new file mode 100644
index 0000000..8a30a2d
--- /dev/null
+++ b/examples/ping_vector/python/CMakeLists.min.txt
@@ -0,0 +1,24 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the \"License\");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an \"AS IS\" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Testing
+if(BUILD_TESTING)
+  add_test(NAME EXAMPLE_PYTHON_PING_VECTOR_TEST
+    COMMAND python3 ping_vector.py
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  )
+  set_tests_properties(EXAMPLE_PYTHON_PING_VECTOR_TEST PROPERTIES
+                       PASS_REGULAR_EXPRESSION "Rx message receiver value\\[1\\]\\[4\\]: 126")
+endif()
diff --git a/examples/ping_vector/python/CMakeLists.txt b/examples/ping_vector/python/CMakeLists.txt
index 41b2540..2866337 100644
--- a/examples/ping_vector/python/CMakeLists.txt
+++ b/examples/ping_vector/python/CMakeLists.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -29,6 +29,13 @@ install(FILES
     COMPONENT "holoscan-examples"
 )
 
+# Install the minimal CMakeLists.txt file
+install(FILES CMakeLists.min.txt
+  RENAME "CMakeLists.txt"
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT holoscan-examples
+)
+
 # Testing
 if(HOLOSCAN_BUILD_TESTS)
   add_test(NAME EXAMPLE_PYTHON_PING_VECTOR_TEST
diff --git a/examples/resources/clock/python/CMakeLists.min.txt b/examples/resources/clock/python/CMakeLists.min.txt
new file mode 100644
index 0000000..4bce6f6
--- /dev/null
+++ b/examples/resources/clock/python/CMakeLists.min.txt
@@ -0,0 +1,28 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the \"License\");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an \"AS IS\" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Testing
+if(BUILD_TESTING)
+  add_test(NAME EXAMPLE_RESOURCES_PYTHON_PING_CLOCK_TEST
+    COMMAND python3 ping_clock.py
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  )
+  set_tests_properties(EXAMPLE_RESOURCES_PYTHON_PING_CLOCK_TEST PROPERTIES
+    PASS_REGULAR_EXPRESSION "Rx message value: 3"
+  )
+  set_tests_properties(EXAMPLE_RESOURCES_PYTHON_PING_CLOCK_TEST PROPERTIES
+    PASS_REGULAR_EXPRESSION "final timestamp"
+  )
+endif()
diff --git a/examples/resources/clock/python/CMakeLists.txt b/examples/resources/clock/python/CMakeLists.txt
index fd76b23..9a73701 100644
--- a/examples/resources/clock/python/CMakeLists.txt
+++ b/examples/resources/clock/python/CMakeLists.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -30,6 +30,13 @@ install(FILES
     COMPONENT "holoscan-examples"
 )
 
+# Install the minimal CMakeLists.txt file
+install(FILES CMakeLists.min.txt
+  RENAME "CMakeLists.txt"
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT holoscan-examples
+)
+
 # Testing
 if(HOLOSCAN_BUILD_TESTS)
   add_test(NAME EXAMPLE_RESOURCES_PYTHON_PING_CLOCK_TEST
diff --git a/examples/resources/native/python/CMakeLists.min.txt b/examples/resources/native/python/CMakeLists.min.txt
new file mode 100644
index 0000000..14ea632
--- /dev/null
+++ b/examples/resources/native/python/CMakeLists.min.txt
@@ -0,0 +1,28 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the \"License\");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an \"AS IS\" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Testing
+if(BUILD_TESTING)
+  add_test(NAME EXAMPLE_RESOURCES_PYTHON_NATIVE_RESOURCE_TEST
+    COMMAND python3 native_resource.py
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  )
+  set_tests_properties(EXAMPLE_RESOURCES_PYTHON_NATIVE_RESOURCE_TEST PROPERTIES
+    PASS_REGULAR_EXPRESSION "native resource setup method called"
+    PASS_REGULAR_EXPRESSION "MinimalOp compute method called"
+    FAIL_REGULAR_EXPRESSION "error"
+    FAIL_REGULAR_EXPRESSION "Exception occurred"
+  )
+endif()
diff --git a/examples/resources/native/python/CMakeLists.txt b/examples/resources/native/python/CMakeLists.txt
index 210c295..ac1d7c5 100644
--- a/examples/resources/native/python/CMakeLists.txt
+++ b/examples/resources/native/python/CMakeLists.txt
@@ -30,6 +30,13 @@ install(FILES
     COMPONENT "holoscan-examples"
 )
 
+# Install the minimal CMakeLists.txt file
+install(FILES CMakeLists.min.txt
+  RENAME "CMakeLists.txt"
+  DESTINATION "${app_relative_dest_path}"
+  COMPONENT holoscan-examples
+)
+
 # Testing
 if(HOLOSCAN_BUILD_TESTS)
   add_test(NAME EXAMPLE_RESOURCES_PYTHON_NATIVE_RESOURCE_TEST
diff --git a/examples/v4l2_camera/README.md b/examples/v4l2_camera/README.md
index 2468de5..32743ad 100644
--- a/examples/v4l2_camera/README.md
+++ b/examples/v4l2_camera/README.md
@@ -4,8 +4,8 @@ This app captures video streams using [Video4Linux](https://www.kernel.org/doc/h
 
 #### Notes on the V4L2 operator
 
-* The V4L2 operator can read a range of pixel formats, though it will always output RGBA32 at this time.
-* If the pixel format is not specified in the YAML configuration file, it will automatically select either `AB24`, `YUYV`, or `MJPG` if supported by the device. The first supported format in the order provided will be used. For other formats, you will need to specify the `pixel_format` parameter in the yaml file which will then be used but note that the operator expects that these formats can be encoded as RGBA32. If not, the behavior is undefined.
+* The V4L2 operator can read a range of pixel formats, it will always output RGBA32 if `pass_through` is `false` (the default).
+* If the pixel format is not specified in the YAML configuration file, it will automatically select either `AB24`, `YUYV`, `MJPG`, or `RGB3` if supported by the device. The first supported format in the order provided will be used. For other formats, you will need to specify the `pixel_format` parameter in the yaml file which will then be used but note that the operator expects that these formats can be encoded as RGBA32 if `pass_through` is `false` (the default). If the format can't be encoded as RGBA32, the behavior is undefined.
 * The V4L2 operator outputs data on host. In order to move data from host to GPU device, use `holoscan::ops::FormatConverterOp`.
 
 ## Requirements
@@ -48,6 +48,7 @@ There are a few parameters that can be specified:
 * `pixel_format`: The [V4L2 pixel format](https://docs.kernel.org/userspace-api/media/v4l/pixfmt-intro.html) of the device, as FourCC code
   * Default: auto selects `AB24`, `YUYV`, or `MJPG` based on device support
   * List available options with `v4l2-ctl -d /dev/<your_device> --list-formats`
+* `pass_through`: If set, pass_through the input buffer to the output unmodified, else convert to RGBA32 (default `false`).
 * `width` and `height`: The frame dimensions
   * Default: device default
   * List available options with `v4l2-ctl -d /dev/<your_device> --list-formats-ext`
@@ -131,3 +132,25 @@ ffmpeg -stream_loop -1 -re -i /path/to/video.mp4 -pix_fmt yuyv422 -f v4l2 /dev/v
 ```
 
 Next, run the `v4l2_camera` application having specified the correct device node in the yaml-configuration file (set the `device` parameter of the V4L2 operator to `/dev/video3`). The mp4 video should be showing in the Holoviz window.
+
+## Use YUV pass through
+
+If the video device supports the `YUYV` format, the video frames can be displayed by the Holoviz operator without the need to be converted by the V4L capture operator. First check if the video device supports the `YUYV` format:
+```sh
+v4l2-ctl -d /dev/video0 --list-formats
+```
+
+If this lists `'YUYV' (YUYV 4:2:2)` as a supported format, run the example with `--config v4l2_camera_yuv.yaml` as an argument. This will select a configuration file which sets up the V4L capture operator to output YUV and the Holoviz operator to expect YUV as input.
+
+Run the example and check the log, Holoviz will list the input specification which indicates that it is rendering YUV frames:
+
+```
+[info] [holoviz.cpp:1798] Input spec:
+- type: color
+  name: ""
+  opacity: 1.000000
+  priority: 0
+  image_format: y8u8y8v8_422_unorm
+  yuv_model_conversion: yuv_601
+  yuv_range: itu_full
+```
\ No newline at end of file
diff --git a/examples/v4l2_camera/cpp/CMakeLists.min.txt b/examples/v4l2_camera/cpp/CMakeLists.min.txt
index 50cc80c..57231d8 100644
--- a/examples/v4l2_camera/cpp/CMakeLists.min.txt
+++ b/examples/v4l2_camera/cpp/CMakeLists.min.txt
@@ -34,9 +34,9 @@ target_link_libraries(v4l2_camera
 
 # Copy config file
 add_custom_target(v4l2_camera_yaml
-  COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/v4l2_camera.yaml" ${CMAKE_CURRENT_BINARY_DIR}
-  DEPENDS "v4l2_camera.yaml"
-  BYPRODUCTS "v4l2_camera.yaml"
+  COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/v4l2_camera.yaml" "${CMAKE_CURRENT_SOURCE_DIR}/v4l2_camera_yuv.yaml" ${CMAKE_CURRENT_BINARY_DIR}
+  DEPENDS "v4l2_camera.yaml" "v4l2_camera_yuv.yaml"
+  BYPRODUCTS "v4l2_camera.yaml" "v4l2_camera_yuv.yaml"
 )
 
 add_dependencies(v4l2_camera v4l2_camera_yaml)
@@ -44,14 +44,8 @@ add_dependencies(v4l2_camera v4l2_camera_yaml)
 # Testing
 option(HOLOSCAN_BUILD_V4L2_TESTS "Build tests for V4L2 loopback" OFF)
 if(HOLOSCAN_BUILD_TESTS AND HOLOSCAN_BUILD_V4L2_TESTS)
-  # Assumes that the v4l2 video loopback is mounted on /dev/video3.  This allows us to create a
-  # a virtual video device and stream data from an mp4 file without the need for a physical
-  # video input device. To setup v4l2 video loopback, refer to the "Use with V4L2 Loopback Devices"
-  # section of the README file for this example
-  file(READ ${CMAKE_CURRENT_SOURCE_DIR}/v4l2_camera.yaml CONFIG_STRING)
-  string(REPLACE "device: \"/dev/video0\"" "device: \"/dev/video3\"" CONFIG_STRING "${CONFIG_STRING}")
-  set(CONFIG_FILE ${CMAKE_CURRENT_BINARY_DIR}/cpp_v4l2_camera_config.yaml)
-  file(WRITE ${CONFIG_FILE} "${CONFIG_STRING}")
+  # Assumes that the v4l2 video loopback has already been mounted and the yaml files have been
+  # updated to use the virtual loopback device.
 
   # Modify testcase to only run 10 frames
   add_custom_command(OUTPUT v4l2_camera_test.cpp
@@ -75,12 +69,17 @@ if(HOLOSCAN_BUILD_TESTS AND HOLOSCAN_BUILD_V4L2_TESTS)
   add_dependencies(v4l2_camera_test racerx_data)
 
   add_test(NAME EXAMPLE_CPP_V4L2_CAMERA_TEST
-           COMMAND bash -c "ffmpeg -stream_loop -1 -re -i ${CMAKE_SOURCE_DIR}/data/racerx/racerx-small.mp4 \
-                                    -pix_fmt yuyv422 -f v4l2 /dev/video3 &  sleep 5; \
-                           ${CMAKE_CURRENT_BINARY_DIR}/v4l2_camera_test ${CONFIG_FILE}; echo 'Done'; kill %1"
+           COMMAND "${CMAKE_CURRENT_BINARY_DIR}/v4l2_camera_test"
            WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
           )
   set_tests_properties(EXAMPLE_CPP_V4L2_CAMERA_TEST PROPERTIES
                        PASS_REGULAR_EXPRESSION "Application has finished running")
-endif()
 
+  add_test(NAME EXAMPLE_CPP_V4L2_YUV_CAMERA_TEST
+           COMMAND "${CMAKE_CURRENT_BINARY_DIR}/v4l2_camera_test" v4l2_camera_yuv.yaml
+           WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+          )
+  set_tests_properties(EXAMPLE_CPP_V4L2_YUV_CAMERA_TEST PROPERTIES
+                       PASS_REGULAR_EXPRESSION "Application has finished running")
+
+endif()
diff --git a/examples/v4l2_camera/cpp/CMakeLists.txt b/examples/v4l2_camera/cpp/CMakeLists.txt
index b177f57..861d780 100644
--- a/examples/v4l2_camera/cpp/CMakeLists.txt
+++ b/examples/v4l2_camera/cpp/CMakeLists.txt
@@ -27,9 +27,9 @@ target_link_libraries(v4l2_camera
 
 # Copy config file
 add_custom_target(v4l2_camera_yaml
-  COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/v4l2_camera.yaml" ${CMAKE_CURRENT_BINARY_DIR}
-  DEPENDS "v4l2_camera.yaml"
-  BYPRODUCTS "v4l2_camera.yaml"
+  COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/v4l2_camera.yaml" "${CMAKE_CURRENT_SOURCE_DIR}/v4l2_camera_yuv.yaml" ${CMAKE_CURRENT_BINARY_DIR}
+  DEPENDS "v4l2_camera.yaml" "v4l2_camera_yuv.yaml"
+  BYPRODUCTS "v4l2_camera.yaml" "v4l2_camera_yuv.yaml"
 )
 
 add_dependencies(v4l2_camera v4l2_camera_yaml)
@@ -47,7 +47,7 @@ install(TARGETS v4l2_camera
   DESTINATION "${app_relative_dest_path}"
   COMPONENT holoscan-examples
 )
-install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/v4l2_camera.yaml"
+install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/v4l2_camera.yaml" "${CMAKE_CURRENT_SOURCE_DIR}/v4l2_camera_yuv.yaml"
   DESTINATION ${app_relative_dest_path}
   COMPONENT holoscan-examples
 )
@@ -99,5 +99,12 @@ if(HOLOSCAN_BUILD_TESTS AND HOLOSCAN_BUILD_V4L2_TESTS)
           )
   set_tests_properties(EXAMPLE_CPP_V4L2_CAMERA_TEST PROPERTIES
                        PASS_REGULAR_EXPRESSION "Application has finished running")
-endif()
 
+  add_test(NAME EXAMPLE_CPP_V4L2_YUV_CAMERA_TEST
+           COMMAND "${CMAKE_CURRENT_BINARY_DIR}/v4l2_camera_test" v4l2_camera_yuv.yaml
+           WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+          )
+  set_tests_properties(EXAMPLE_CPP_V4L2_YUV_CAMERA_TEST PROPERTIES
+                       PASS_REGULAR_EXPRESSION "Application has finished running")
+
+endif()
diff --git a/examples/v4l2_camera/cpp/v4l2_camera.yaml b/examples/v4l2_camera/cpp/v4l2_camera.yaml
index 9e2c260..507698e 100644
--- a/examples/v4l2_camera/cpp/v4l2_camera.yaml
+++ b/examples/v4l2_camera/cpp/v4l2_camera.yaml
@@ -27,6 +27,10 @@ source:  # V4L2VideoCaptureOp
   # | App will auto-select the default "pixel_format" for your device if not provided.
   # | See this app's readme file for details.
   # pixel_format: "AB24"
+  # | App will convert to RGBA32 if "pass_through" is "false" (the default). If "pass_through" is
+  # | "true" the video buffer will be passed to the HolovizOp unmodified.
+  # | See this app's readme file for details.
+  # pass_through: true
 
   # | These properties might not be supported for all v4l2 nodes.
   # | The app will attempt to do auto exposure and gain if not provided. If auto is not supported,
@@ -36,3 +40,4 @@ source:  # V4L2VideoCaptureOp
   # gain: 100
 
 visualizer:  # Holoviz
+
diff --git a/examples/v4l2_camera/cpp/v4l2_camera_yuv.yaml b/examples/v4l2_camera/cpp/v4l2_camera_yuv.yaml
new file mode 100644
index 0000000..d05ddc3
--- /dev/null
+++ b/examples/v4l2_camera/cpp/v4l2_camera_yuv.yaml
@@ -0,0 +1,46 @@
+%YAML 1.2
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+source:  # V4L2VideoCaptureOp
+  # | Input device. On Clara AGX or NVIDIA IGX devkits, /dev/video0 is generally the HDMI IN
+  device: "/dev/video0"
+
+  # | App will auto-select default width and height if not provided
+  # | For performance, you may want to specify these parameters yourself.
+  # | See this app's readme file for details.
+  # width: 1920
+  # height: 1080
+
+  # | App will auto-select the default "pixel_format" for your device if not provided.
+  # | See this app's readme file for details.
+  pixel_format: "YUYV"
+  # | App will convert to RGBA32 if "pass_through" is "false" (the default). If "pass_through" is
+  # | "true" the video buffer will be passed to the HolovizOp unmodified.
+  # | See this app's readme file for details.
+  pass_through: true
+
+  # | These properties might not be supported for all v4l2 nodes.
+  # | The app will attempt to do auto exposure and gain if not provided. If auto is not supported,
+  # | it will use the defaults defined by your device.
+  # | See this app's readme file for details.
+  # exposure_time: 500
+  # gain: 100
+
+visualizer:  # Holoviz
+  tensors:
+    - name: ""
+      type: "color"
+      image_format: "y8u8y8v8_422_unorm"
diff --git a/examples/v4l2_camera/python/CMakeLists.txt b/examples/v4l2_camera/python/CMakeLists.txt
index 1101649..de2b6ae 100644
--- a/examples/v4l2_camera/python/CMakeLists.txt
+++ b/examples/v4l2_camera/python/CMakeLists.txt
@@ -25,9 +25,9 @@ add_custom_target(python_v4l2_camera ALL
 
 # Copy config file
 add_custom_target(python_v4l2_camera_yaml
-  COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/v4l2_camera.yaml" ${CMAKE_CURRENT_BINARY_DIR}
-  DEPENDS "v4l2_camera.yaml"
-  BYPRODUCTS "v4l2_camera.yaml"
+  COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/v4l2_camera.yaml" "${CMAKE_CURRENT_SOURCE_DIR}/v4l2_camera_yuv.yaml" ${CMAKE_CURRENT_BINARY_DIR}
+  DEPENDS "v4l2_camera.yaml" "v4l2_camera_yuv.yaml"
+  BYPRODUCTS "v4l2_camera.yaml" "v4l2_camera_yuv.yaml"
 )
 
 add_dependencies(python_v4l2_camera python_v4l2_camera_yaml)
@@ -36,6 +36,7 @@ add_dependencies(python_v4l2_camera python_v4l2_camera_yaml)
 install(FILES
     "${CMAKE_CURRENT_SOURCE_DIR}/v4l2_camera.py"
     "${CMAKE_CURRENT_SOURCE_DIR}/v4l2_camera.yaml"
+    "${CMAKE_CURRENT_SOURCE_DIR}/v4l2_camera_yuv.yaml"
     DESTINATION "${app_relative_dest_path}"
     COMPONENT "holoscan-examples"
 )
@@ -62,4 +63,14 @@ if(HOLOSCAN_BUILD_TESTS AND HOLOSCAN_BUILD_V4L2_TESTS)
   set_tests_properties(EXAMPLE_PYTHON_V4L2_CAMERA_TEST PROPERTIES
     PASS_REGULAR_EXPRESSION "Application has finished running"
   )
+
+  add_test(NAME EXAMPLE_PYTHON_V4L2_YUV_CAMERA_TEST
+           COMMAND python3 v4l2_camera_test.py v4l2_camera_yuv.yaml
+           WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+  )
+
+  set_tests_properties(EXAMPLE_PYTHON_V4L2_YUV_CAMERA_TEST PROPERTIES
+    PASS_REGULAR_EXPRESSION "Application has finished running"
+  )
+
 endif()
diff --git a/examples/v4l2_camera/python/v4l2_camera_yuv.yaml b/examples/v4l2_camera/python/v4l2_camera_yuv.yaml
new file mode 100644
index 0000000..d05ddc3
--- /dev/null
+++ b/examples/v4l2_camera/python/v4l2_camera_yuv.yaml
@@ -0,0 +1,46 @@
+%YAML 1.2
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+source:  # V4L2VideoCaptureOp
+  # | Input device. On Clara AGX or NVIDIA IGX devkits, /dev/video0 is generally the HDMI IN
+  device: "/dev/video0"
+
+  # | App will auto-select default width and height if not provided
+  # | For performance, you may want to specify these parameters yourself.
+  # | See this app's readme file for details.
+  # width: 1920
+  # height: 1080
+
+  # | App will auto-select the default "pixel_format" for your device if not provided.
+  # | See this app's readme file for details.
+  pixel_format: "YUYV"
+  # | App will convert to RGBA32 if "pass_through" is "false" (the default). If "pass_through" is
+  # | "true" the video buffer will be passed to the HolovizOp unmodified.
+  # | See this app's readme file for details.
+  pass_through: true
+
+  # | These properties might not be supported for all v4l2 nodes.
+  # | The app will attempt to do auto exposure and gain if not provided. If auto is not supported,
+  # | it will use the defaults defined by your device.
+  # | See this app's readme file for details.
+  # exposure_time: 500
+  # gain: 100
+
+visualizer:  # Holoviz
+  tensors:
+    - name: ""
+      type: "color"
+      image_format: "y8u8y8v8_422_unorm"
diff --git a/examples/video_replayer/README.md b/examples/video_replayer/README.md
index e933baa..8bb4328 100644
--- a/examples/video_replayer/README.md
+++ b/examples/video_replayer/README.md
@@ -8,6 +8,15 @@ The video frames need to have been converted to a gxf entity format to use as in
 
 *Visit the [SDK User Guide](https://docs.nvidia.com/holoscan/sdk-user-guide/examples/video_replayer.html) for step-by-step documentation of this example.*
 
+#### Note on error logged by the application
+Note that it is currently expected that this application logs the following error during shutdown
+
+```text
+[error] [ucx_context.cpp:466] Connection dropped with status -25 (Connection reset by remote peer)
+```
+
+This will be logged by the worker that is running "fragment2" after "fragment1" has sent all messages. It is caused by fragment 1 starting to shutdown after its last message has been sent, resulting in severing of connections from fragment 2 receivers to fragment 1 transmitters.
+
 ## Data
 
 The following dataset is used by this example:
diff --git a/gxf_extensions/README.md b/gxf_extensions/README.md
index 9056597..85aea62 100644
--- a/gxf_extensions/README.md
+++ b/gxf_extensions/README.md
@@ -2,6 +2,5 @@
 
 See the User Guide for details regarding the extensions in GXF and Holoscan SDK, and for instructions to build your own extensions
 
-- `bayer_demosaic`: includes the `nvidia::holoscan::BayerDemosaic` codelet. It performs color filter array (CFA) interpolation for 1-channel inputs of 8 or 16-bit unsigned integer and outputs an RGB or RGBA image. This codelet is no longer used in the core SDK as there is now also a native `holoscan::Operator` version available (instead of wrapping this codelet as a `holoscan::gxf::GXFOperator`). This version is kept as a concrete example of a codelet and a `GXFOperator` wrapping this codelet can still be found in `tests/system/bayer_demosaic_gxf.hpp` where it is used for test cases.
 - `gxf_holoscan_wrapper`: includes the `holoscan::gxf::OperatorWrapper` codelet. It is used as a utility base class to wrap a holoscan operator to interface with the GXF framework.
-- `ucx_holoscan`: includes `nvidia::holoscan::UcxHoloscanComponentSerializer` which is a `nvidia::gxf::ComponentSerializer` that handles serialization and deserialization of `holoscan::Message` and `holoscan::Tensor` types over a Unified Communication X (UCX) network connection. UCX is used by Holoscan SDK to send data between fragments of distributed applications. This extension must be used in combination with standard GXF UCX extension components. Specifically, this `UcxHoloscanComponentSerializer` is intended for use by the `UcxEntitySerializer` where it can operate alongside the `UcxComponentSerializer` that serializes GXF-specific types (`nvidia::gxf::Tensor`, `nvidia::gxf::VideoBuffer`, etc.).
+- `ucx`: includes `nvidia::holoscan::UcxHoloscanComponentSerializer` which is a `nvidia::gxf::ComponentSerializer` that handles serialization and deserialization of `holoscan::Message` and `holoscan::Tensor` types over a Unified Communication X (UCX) network connection. UCX is used by Holoscan SDK to send data between fragments of distributed applications. This extension must be used in combination with standard GXF UCX extension components. Specifically, this `UcxHoloscanComponentSerializer` is intended for use by the `UcxEntitySerializer` where it can operate alongside the `UcxComponentSerializer` that serializes GXF-specific types (`nvidia::gxf::Tensor`, `nvidia::gxf::VideoBuffer`, etc.).
diff --git a/include/holoscan/core/executors/gxf/gxf_executor.hpp b/include/holoscan/core/executors/gxf/gxf_executor.hpp
index 934cc25..ecbca96 100644
--- a/include/holoscan/core/executors/gxf/gxf_executor.hpp
+++ b/include/holoscan/core/executors/gxf/gxf_executor.hpp
@@ -220,7 +220,7 @@ class GXFExecutor : public holoscan::Executor {
                                   ///< initializing a new operator if this is 0.
   gxf_uid_t op_cid_ = 0;  ///< The GXF component ID of the operator. Create new component for
                           ///< initializing a new operator if this is 0.
-  nvidia::gxf::Extension* gxf_holoscan_extension_ = nullptr;    ///< The GXF holoscan extension.
+  nvidia::gxf::Extension* gxf_holoscan_extension_ = nullptr;  ///< The GXF holoscan extension.
 
   /// The flag to indicate whether the GXF graph is initialized.
   bool is_gxf_graph_initialized_ = false;
diff --git a/include/holoscan/core/executors/gxf/gxf_parameter_adaptor.hpp b/include/holoscan/core/executors/gxf/gxf_parameter_adaptor.hpp
index 5259946..b1d0ff9 100644
--- a/include/holoscan/core/executors/gxf/gxf_parameter_adaptor.hpp
+++ b/include/holoscan/core/executors/gxf/gxf_parameter_adaptor.hpp
@@ -455,7 +455,7 @@ class GXFParameterAdaptor {
                   gxf_uid_t resource_cid = gxf_resource->gxf_cid();
                   std::string full_resource_name =
                       gxf::get_full_component_name(context, resource_cid);
-                  yaml_node.push_back(full_resource_name.c_str());
+                  yaml_node.push_back(full_resource_name);
                 } else {
                   HOLOSCAN_LOG_TRACE(
                       "Resource item in the vector is null. Skipping it for key '{}'", key);
@@ -487,7 +487,7 @@ class GXFParameterAdaptor {
                   gxf_uid_t condition_cid = gxf_condition->gxf_cid();
                   std::string full_condition_name =
                       gxf::get_full_component_name(context, condition_cid);
-                  yaml_node.push_back(full_condition_name.c_str());
+                  yaml_node.push_back(full_condition_name);
                 } else {
                   HOLOSCAN_LOG_TRACE(
                       "Condition item in the vector is null. Skipping it for key '{}'", key);
diff --git a/include/holoscan/core/gxf/gxf_utils.hpp b/include/holoscan/core/gxf/gxf_utils.hpp
index 5076f04..977dc66 100644
--- a/include/holoscan/core/gxf/gxf_utils.hpp
+++ b/include/holoscan/core/gxf/gxf_utils.hpp
@@ -257,6 +257,13 @@ bool has_component(gxf_context_t context, gxf_uid_t eid, gxf_tid_t tid = GxfTidN
 /// Create a GXF entity group with the specified name
 gxf_uid_t add_entity_group(void* context, std::string name);
 
+/**
+ * @brief Get the default queue policy from environment variable HOLOSCAN_QUEUE_POLICY.
+ *
+ * @return The default queue policy.
+ */
+uint64_t get_default_queue_policy();
+
 }  // namespace holoscan::gxf
 
 #endif /* HOLOSCAN_CORE_GXF_GXF_UTILS_HPP */
diff --git a/include/holoscan/core/io_context.hpp b/include/holoscan/core/io_context.hpp
index 7ebac1b..f89d301 100644
--- a/include/holoscan/core/io_context.hpp
+++ b/include/holoscan/core/io_context.hpp
@@ -22,6 +22,7 @@
 #include <map>
 #include <memory>
 #include <string>
+#include <string_view>
 #include <typeinfo>
 #include <unordered_map>
 #include <utility>
@@ -43,9 +44,13 @@ namespace holoscan {
 struct NoMessageType {};
 constexpr NoMessageType kNoReceivedMessage;
 
-// To indicate that type casting is not possible
-struct NoCastableType {};
-constexpr NoCastableType kNoTypeCastableMessage;
+// To indicate that input port is not accessible
+struct NoAccessibleMessageType : public std::string {
+  NoAccessibleMessageType() : std::string("Port is not accessible") {}
+  explicit NoAccessibleMessageType(const std::string& message) : std::string(message) {}
+  explicit NoAccessibleMessageType(const char* message) : std::string(message) {}
+  explicit NoAccessibleMessageType(std::string&& message) : std::string(std::move(message)) {}
+};
 
 static inline std::string get_well_formed_name(
     const char* name, const std::unordered_map<std::string, std::shared_ptr<IOSpec>>& io_list) {
@@ -162,6 +167,9 @@ class InputContext {
    * which will contain the error message. The error message can be access by calling the `what()`
    * method of the holoscan::unexpected object.
    *
+   * It throws an invalid argument exception if the operator attempts to receive non-vector data
+   * (`op_input.receive<T>()`) from an input port with a queue size of `IOSpec::kAnySize`.
+   *
    * Example:
    *
    * ```cpp
@@ -190,248 +198,32 @@ class InputContext {
    */
   template <typename DataT>
   holoscan::expected<DataT, holoscan::RuntimeError> receive(const char* name = nullptr) {
+    auto& params = op_->spec()->params();
+    auto param_it = params.find(std::string(name));
+
     if constexpr (holoscan::is_vector_v<DataT>) {
-      // It could either be a parameter which is trying to receive from a vector
-      // or a vector of values from the inputs
-      // First check, if it is trying to receive from a parameter
-
-      auto& params = op_->spec()->params();
-      auto it = params.find(std::string(name));
-
-      if (it == params.end()) {
-        // the name is not a parameter, so it must be an input
-        auto& inputs = op_->spec()->inputs();
-        auto input_it = inputs.find(std::string(name));
-        if (input_it == inputs.end()) {
-          auto error_message =
-              fmt::format("Unable to find input parameter or input port with name '{}'", name);
-          // Keep the debugging info on for development purposes
-          HOLOSCAN_LOG_DEBUG(error_message);
-          return make_unexpected<holoscan::RuntimeError>(
-              holoscan::RuntimeError(holoscan::ErrorCode::kReceiveError, error_message.c_str()));
-        }
+      DataT input_vector;
+      std::string error_message;
 
-        auto value = receive_impl(name);
-        if (value.type() == typeid(nullptr_t)) {
-          auto error_message =
-              fmt::format("No data is received from the input port with name '{}'", name);
-          HOLOSCAN_LOG_DEBUG(error_message);
+      if (param_it != params.end()) {
+        auto& param_wrapper = param_it->second;
+        if (!is_valid_param_type(param_wrapper.arg_type())) {
           return make_unexpected<holoscan::RuntimeError>(
-              holoscan::RuntimeError(holoscan::ErrorCode::kReceiveError, error_message.c_str()));
+              create_receive_error(name, "Input parameter is not of type 'std::vector<IOSpec*>'"));
         }
-        try {
-          // Check if the user is trying to receive a vector of the original type
-          if constexpr (std::is_same_v<typename DataT::value_type, std::any>) {
-            std::vector<std::any> value_vector;
-            // Push the first value
-            value_vector.push_back(std::move(value));
-            value = receive_impl(name);
-            // Keep pushing the values until we reach a nullptr
-            while (value.type() != typeid(NoMessageType)) {
-              value_vector.push_back(std::move(value));
-              value = receive_impl(name);
-            }
-            return value_vector;
-          } else if (value.type() == typeid(typename holoscan::element_of_t<DataT>)) {
-            DataT value_vector;
-            // Push the first value
-            value_vector.push_back(
-                std::any_cast<typename holoscan::element_of_t<DataT>>(std::move(value)));
-            value = receive_impl(name);
-            // Keep pushing the values until we reach a nullptr
-            while (value.type() != typeid(NoMessageType)) {
-              value_vector.push_back(
-                  std::any_cast<typename holoscan::element_of_t<DataT>>(std::move(value)));
-              value = receive_impl(name);
-            }
-            return value_vector;
-          } else {
-            DataT result = std::any_cast<DataT>(value);
-            return result;
-          }
-        } catch (const std::bad_any_cast& e) {
-          auto error_message = fmt::format(
-              "Unable to cast the received data to the specified type ({}) for input '{}' of type "
-              "{}: {}",
-              nvidia::TypenameAsString<DataT>(),
-              name,
-              value.type().name(),
-              e.what());
-          HOLOSCAN_LOG_DEBUG(error_message);
+        if (!fill_input_vector_from_params(param_wrapper, name, input_vector, error_message)) {
           return make_unexpected<holoscan::RuntimeError>(
-              holoscan::RuntimeError(holoscan::ErrorCode::kReceiveError, error_message.c_str()));
+              create_receive_error(name, error_message.c_str()));
         }
-      }
-
-      auto& param_wrapper = it->second;
-      auto& arg_type = param_wrapper.arg_type();
-      if ((arg_type.element_type() != ArgElementType::kIOSpec) ||
-          (arg_type.container_type() != ArgContainerType::kVector)) {
-        auto error_message = fmt::format(
-            "Input parameter with name '{}' is not of type 'std::vector<IOSpec*>'", name);
-        HOLOSCAN_LOG_ERROR(error_message);
-        return make_unexpected<holoscan::RuntimeError>(
-            holoscan::RuntimeError(holoscan::ErrorCode::kReceiveError, error_message.c_str()));
-      }
-      std::any& any_param = param_wrapper.value();
-      // Note that the type of any_param is Parameter<typeT>*, not Parameter<typeT>.
-      auto& param = *std::any_cast<Parameter<std::vector<IOSpec*>>*>(any_param);
-
-      std::vector<typename DataT::value_type> input_vector;
-      int num_inputs = param.get().size();
-      input_vector.reserve(num_inputs);
-
-      for (int index = 0; index < num_inputs; ++index) {
-        // Check if the input name points to the parameter name of the operator,
-        // and the parameter type is 'std::vector<holoscan::IOSpec*>'.
-        // In other words, find if there is a receiver with a specific label
-        // ('<parameter name>:<index>'. e.g, 'receivers:0') to return an object with
-        // 'std::vector<std::shared_ptr<DataT_ElementT>' type.
-        auto value = receive_impl(fmt::format("{}:{}", name, index).c_str(), true);
-
-        try {
-          // If the received data is nullptr, any_cast will try to cast to appropriate pointer
-          // type. Otherwise it will register an error.
-          if constexpr (std::is_same_v<typename DataT::value_type, std::any>) {
-            input_vector.push_back(std::move(value));
-          } else {
-            auto casted_value = std::move(std::any_cast<typename DataT::value_type>(value));
-            input_vector.push_back(std::move(casted_value));
-          }
-        } catch (const std::bad_any_cast& e) {
-          auto error_message = fmt::format(
-              "Unable to cast the received data to the specified type ({}) for input '{}:{}' of "
-              "type {}: {}",
-              nvidia::TypenameAsString<DataT>(),
-              name,
-              index,
-              value.type().name(),
-              e.what());
-          try {
-            // An empty holoscan::gxf::Entity will be added to the vector.
-            typename DataT::value_type placeholder;
-            input_vector.push_back(std::move(placeholder));
-            error_message =
-                fmt::format("{}\tA placeholder value is added to the vector for input '{}:{}'.",
-                            error_message,
-                            name,
-                            index);
-            HOLOSCAN_LOG_WARN(error_message);
-          } catch (std::exception& e) {
-            error_message = fmt::format(
-                "{}\tUnable to add a placeholder value to the vector for input '{}:{}' :{}. "
-                "Skipping adding a value to the vector.",
-                error_message,
-                name,
-                index,
-                e.what());
-            HOLOSCAN_LOG_ERROR(error_message);
-            continue;
-          }
-        }
-      }
-      return std::any_cast<DataT>(input_vector);
-    } else {
-      // If it is not a vector then try to get the input directly and convert for respective data
-      // type for an input
-      auto value = receive_impl(name);
-      const std::type_info& value_type = value.type();
-      bool is_bad_any_cast = false;
-      std::string exception_message;
-
-      // If no message is received, return an error message
-      if (value_type == typeid(NoMessageType)) {
-        HOLOSCAN_LOG_DEBUG("No message is received from the input port with name '{}'", name);
-        auto error_message =
-            fmt::format("No message is received from the input port with name '{}'", name);
-        return make_unexpected<holoscan::RuntimeError>(
-            holoscan::RuntimeError(holoscan::ErrorCode::kReceiveError, error_message.c_str()));
-      } else if (value_type == typeid(NoCastableType)) {
-        is_bad_any_cast = true;
-      }
-      if (!is_bad_any_cast) {
-        try {
-          // Check if the types of value and DataT are the same or not
-          if constexpr (std::is_same_v<DataT, std::any>) { return value; }
-          DataT return_value = std::any_cast<DataT>(value);
-          return return_value;
-        } catch (const std::bad_any_cast& e) {
-          exception_message = e.what();
-          is_bad_any_cast = true;
-        }
-      }
-
-      // Case when is_bad_any_cast is true
-
-      // If the received data is nullptr, check whether the sent value was nullptr
-      if (value_type == typeid(nullptr_t)) {
-        HOLOSCAN_LOG_DEBUG("nullptr is received from the input port with name '{}'", name);
-        // If it is a shared pointer or raw pointer, then return nullptr
-        if constexpr (holoscan::is_shared_ptr_v<DataT>) {
-          return nullptr;
-        } else if constexpr (std::is_pointer_v<DataT>) {
-          return nullptr;
-        }
-      }
-
-      // If it is of the type of holoscan::gxf::Entity then show a specific error message
-      if constexpr (is_one_of_derived_v<DataT, nvidia::gxf::Entity>) {
-        auto error_message = fmt::format(
-            "Unable to cast the received data to the specified type (holoscan::gxf::"
-            "Entity) for input {}: {}",
-            name,
-            exception_message);
-        HOLOSCAN_LOG_DEBUG(error_message);
-        return make_unexpected<holoscan::RuntimeError>(
-            holoscan::RuntimeError(holoscan::ErrorCode::kReceiveError, error_message.c_str()));
-      } else if constexpr (is_one_of_derived_v<DataT, holoscan::TensorMap>) {
-        TensorMap tensor_map;
-        try {
-          auto gxf_entity = std::any_cast<holoscan::gxf::Entity>(value);
-
-          auto components_expected = gxf_entity.findAll();
-          auto components = components_expected.value();
-          for (size_t i = 0; i < components.size(); i++) {
-            const auto component = components[i];
-            const auto component_name = component->name();
-            if (std::string(component_name).compare("metadata_") == 0) {
-              // Skip checking for Tensor as it's a MetadataDictionary object
-              continue;
-            }
-            if (std::string(component_name).compare("message_label") == 0) {
-              // Skip checking for Tensor as it's message label for DFFT
-              continue;
-            }
-            if (std::string(component_name).compare("cuda_stream_id_") == 0) {
-              // Skip checking for Tensor as it's a stream ID from CudaStreamHandler
-              continue;
-            }
-            std::shared_ptr<holoscan::Tensor> holoscan_tensor =
-                gxf_entity.get<holoscan::Tensor>(component_name);
-            if (holoscan_tensor) { tensor_map.insert({component_name, holoscan_tensor}); }
-          }
-        } catch (const std::bad_any_cast& e) {
-          auto error_message = fmt::format(
-              "Unable to cast the received data to the specified type (holoscan::TensorMap) for "
-              "input {}: {}",
-              name,
-              e.what());
-          HOLOSCAN_LOG_DEBUG(error_message);
+      } else {
+        if (!fill_input_vector_from_inputs(name, input_vector, error_message)) {
           return make_unexpected<holoscan::RuntimeError>(
-              holoscan::RuntimeError(holoscan::ErrorCode::kReceiveError, error_message.c_str()));
+              create_receive_error(name, error_message.c_str()));
         }
-        return tensor_map;
       }
-      auto error_message = fmt::format(
-          "Unable to cast the received data to the specified type ({}) for input {} of type {}: "
-          "{}",
-          nvidia::TypenameAsString<DataT>(),
-          name,
-          value_type.name(),
-          exception_message);
-      HOLOSCAN_LOG_DEBUG(error_message);
-      return make_unexpected<holoscan::RuntimeError>(
-          holoscan::RuntimeError(holoscan::ErrorCode::kReceiveError, error_message.c_str()));
+      return input_vector;
+    } else {
+      return receive_single_value<DataT>(name);
     }
   }
 
@@ -463,6 +255,272 @@ class InputContext {
     return nullptr;
   }
 
+  // --------------- Start of helper functions for the receive method ---------------
+  inline bool is_valid_param_type(const ArgType& arg_type) {
+    return (arg_type.element_type() == ArgElementType::kIOSpec) &&
+           (arg_type.container_type() == ArgContainerType::kVector);
+  }
+
+  template <typename DataT>
+  inline bool fill_input_vector_from_params(ParameterWrapper& param_wrapper, const char* name,
+                                            DataT& input_vector, std::string& error_message) {
+    auto& param = *std::any_cast<Parameter<std::vector<IOSpec*>>*>(param_wrapper.value());
+    int num_inputs = param.get().size();
+    input_vector.reserve(num_inputs);
+
+    for (int index = 0; index < num_inputs; ++index) {
+      std::string port_name = fmt::format("{}:{}", name, index);
+      auto value = receive_impl(port_name.c_str(), true);
+      const std::type_info& value_type = value.type();
+
+      if (value_type == typeid(kNoReceivedMessage)) {
+        error_message =
+            fmt::format("No data is received from the input port with name '{}'", port_name);
+        return false;
+      }
+
+      if (!process_received_value(value, value_type, name, index, input_vector, error_message)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  template <typename DataT>
+  inline bool fill_input_vector_from_inputs(const char* name, DataT& input_vector,
+                                            std::string& error_message) {
+    const auto& inputs = op_->spec()->inputs();
+    const auto input_it = inputs.find(std::string(name));
+
+    if (input_it == inputs.end()) { return false; }
+
+    int index = 0;
+    while (true) {
+      auto value = receive_impl(name);
+      const std::type_info& value_type = value.type();
+
+      if (value_type == typeid(kNoReceivedMessage)) {
+        if (index == 0) {
+          error_message =
+              fmt::format("No data is received from the input port with name '{}'", name);
+          return false;
+        }
+        break;
+      }
+      if (index == 0 && value_type == typeid(DataT)) {
+        // If the first input is of type DataT (such as `std::vector<bool>`), then return the value
+        // directly
+        input_vector = std::move(std::any_cast<DataT>(value));
+        return true;
+      }
+
+      if (!process_received_value(value, value_type, name, index++, input_vector, error_message)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  inline bool populate_tensor_map(const holoscan::gxf::Entity& gxf_entity,
+                                  holoscan::TensorMap& tensor_map) {
+    auto components_expected = gxf_entity.findAll();
+    auto components = components_expected.value();
+
+    for (const auto& component : components) {
+      const auto component_name = component->name();
+
+      // Skip non-tensor components based on specific names
+      std::string_view component_name_view(component_name);
+      if (component_name_view == "metadata_" || component_name_view == "message_label" ||
+          component_name_view == "cuda_stream_id_") {
+        continue;
+      }
+
+      // Attempt to get the Tensor component
+      std::shared_ptr<holoscan::Tensor> holoscan_tensor =
+          gxf_entity.get<holoscan::Tensor>(component_name);
+
+      if (holoscan_tensor) {
+        tensor_map.insert({component_name, holoscan_tensor});
+      } else {
+        HOLOSCAN_LOG_DEBUG("Unable to get tensor component '{}'", component_name);
+        return false;
+      }
+    }
+    return true;
+  }
+
+  template <typename DataT>
+  inline bool process_received_value(std::any& value, const std::type_info& value_type,
+                                     const char* name, int index, DataT& input_vector,
+                                     std::string& error_message) {
+    bool is_bad_any_cast = false;
+
+    // Assume that the received data is not of type NoMessageType
+    // (this case should be handled by the caller)
+
+    if (value_type == typeid(NoAccessibleMessageType)) {
+      auto casted_value = std::any_cast<NoAccessibleMessageType>(value);
+      HOLOSCAN_LOG_ERROR(static_cast<std::string>(casted_value));
+      error_message = std::move(static_cast<std::string>(casted_value));
+      return false;
+    }
+
+    if constexpr (std::is_same_v<typename DataT::value_type, std::any>) {
+      input_vector.push_back(std::move(value));
+    } else if (value_type == typeid(nullptr_t)) {
+      handle_null_value<DataT>(input_vector);
+    } else {
+      try {
+        auto casted_value = std::any_cast<typename DataT::value_type>(value);
+        input_vector.push_back(casted_value);
+      } catch (const std::bad_any_cast& e) {
+        is_bad_any_cast = true;
+      } catch (const std::exception& e) {
+        error_message = fmt::format(
+            "Unable to cast the received data to the specified type for input '{}:{}' of "
+            "type {}: {}",
+            name,
+            index,
+            value_type.name(),
+            e.what());
+        return false;
+      }
+    }
+
+    if (is_bad_any_cast) {
+      return handle_bad_any_cast<DataT>(value, name, index, input_vector, error_message);
+    }
+
+    return true;
+  }
+
+  template <typename DataT>
+  inline void handle_null_value(DataT& input_vector) {
+    if constexpr (holoscan::is_shared_ptr_v<typename DataT::value_type> ||
+                  std::is_pointer_v<typename DataT::value_type>) {
+      input_vector.push_back(typename DataT::value_type{nullptr});
+    }
+  }
+
+  template <typename DataT>
+  inline bool handle_bad_any_cast(std::any& value, const char* name, int index, DataT& input_vector,
+                                  std::string& error_message) {
+    if constexpr (is_one_of_derived_v<typename DataT::value_type, nvidia::gxf::Entity>) {
+      error_message = fmt::format(
+          "Unable to cast the received data to the specified type (holoscan::gxf::Entity) for "
+          "input "
+          "'{}:{}'",
+          name,
+          index);
+      HOLOSCAN_LOG_DEBUG(error_message);
+      return false;
+    } else if constexpr (is_one_of_derived_v<typename DataT::value_type, holoscan::TensorMap>) {
+      TensorMap tensor_map;
+      try {
+        auto gxf_entity = std::any_cast<holoscan::gxf::Entity>(value);
+        bool is_tensor_map_populated = populate_tensor_map(gxf_entity, tensor_map);
+        if (!is_tensor_map_populated) {
+          auto error_message = fmt::format(
+              "Unable to populate the TensorMap from the received GXF Entity for input '{}:{}'",
+              name,
+              index);
+          HOLOSCAN_LOG_DEBUG(error_message);
+          return false;
+        }
+      } catch (const std::bad_any_cast& e) {
+        error_message = fmt::format(
+            "Unable to cast the received data to the specified type (holoscan::TensorMap) for "
+            "input "
+            "'{}:{}'",
+            name,
+            index);
+        HOLOSCAN_LOG_DEBUG(error_message);
+        return false;
+      }
+      input_vector.push_back(std::move(tensor_map));
+    } else {
+      error_message = fmt::format(
+          "Unable to cast the received data to the specified type for input '{}:{}' of type {}: {}",
+          name,
+          index,
+          value.type().name(),
+          error_message);
+      HOLOSCAN_LOG_DEBUG(error_message);
+      return false;
+    }
+    return true;
+  }
+
+  template <typename DataT>
+  inline holoscan::expected<DataT, holoscan::RuntimeError> receive_single_value(const char* name) {
+    auto value = receive_impl(name);
+    const std::type_info& value_type = value.type();
+
+    if (value_type == typeid(NoMessageType)) {
+      return make_unexpected<holoscan::RuntimeError>(
+          create_receive_error(name, "No message received from the input port"));
+    } else if (value_type == typeid(NoAccessibleMessageType)) {
+      auto casted_value = std::any_cast<NoAccessibleMessageType>(value);
+      HOLOSCAN_LOG_ERROR(static_cast<std::string>(casted_value));
+      auto error_message = std::move(static_cast<std::string>(casted_value));
+      return make_unexpected<holoscan::RuntimeError>(
+          create_receive_error(name, error_message.c_str()));
+    }
+
+    try {
+      if constexpr (std::is_same_v<DataT, std::any>) {
+        return value;
+      } else if (value_type == typeid(nullptr_t)) {
+        return handle_null_value<DataT>();
+      } else if constexpr (is_one_of_derived_v<DataT, nvidia::gxf::Entity>) {
+        // Handle nvidia::gxf::Entity
+        return std::any_cast<DataT>(value);
+      } else if constexpr (is_one_of_derived_v<DataT, holoscan::TensorMap>) {
+        // Handle holoscan::TensorMap
+        TensorMap tensor_map;
+        bool is_tensor_map_populated =
+            populate_tensor_map(std::any_cast<holoscan::gxf::Entity>(value), tensor_map);
+        if (!is_tensor_map_populated) {
+          auto error_message = fmt::format(
+              "Unable to populate the TensorMap from the received GXF Entity for input '{}'", name);
+          HOLOSCAN_LOG_DEBUG(error_message);
+          return make_unexpected<holoscan::RuntimeError>(
+              create_receive_error(name, error_message.c_str()));
+        }
+        return tensor_map;
+      } else {
+        return std::any_cast<DataT>(value);
+      }
+    } catch (const std::bad_any_cast& e) {
+      auto error_message = fmt::format(
+          "Unable to cast the received data to the specified type for input '{}' of type {}",
+          name,
+          value.type().name());
+      HOLOSCAN_LOG_DEBUG(error_message);
+
+      return make_unexpected<holoscan::RuntimeError>(
+          create_receive_error(name, error_message.c_str()));
+    }
+  }
+
+  inline holoscan::RuntimeError create_receive_error(const char* name, const char* message) {
+    auto error_message = fmt::format("ReceiveError on input port '{}': {}", name, message);
+    HOLOSCAN_LOG_DEBUG(error_message);
+    return holoscan::RuntimeError(holoscan::ErrorCode::kReceiveError, error_message.c_str());
+  }
+
+  template <typename DataT>
+  inline holoscan::expected<DataT, holoscan::RuntimeError> handle_null_value() {
+    if constexpr (holoscan::is_shared_ptr_v<DataT> || std::is_pointer_v<DataT>) {
+      return DataT{nullptr};
+    } else {
+      auto error_message = "Received nullptr for a non-pointer type";
+      return make_unexpected<holoscan::RuntimeError>(create_receive_error("input", error_message));
+    }
+  }
+  // --------------- End of helper functions for the receive method ---------------
+
   ExecutionContext* execution_context_ =
       nullptr;              ///< The execution context that is associated with.
   Operator* op_ = nullptr;  ///< The operator that this context is associated with.
diff --git a/include/holoscan/core/metadata.hpp b/include/holoscan/core/metadata.hpp
index f31787e..f220962 100644
--- a/include/holoscan/core/metadata.hpp
+++ b/include/holoscan/core/metadata.hpp
@@ -59,7 +59,7 @@ class MetadataDictionary {
   using ConstIterator = MapType::const_iterator;
 
   // Constructors
-  explicit MetadataDictionary(const MetadataPolicy& policy = MetadataPolicy::kUpdate)
+  explicit MetadataDictionary(const MetadataPolicy& policy = MetadataPolicy::kRaise)
       : dictionary_(std::make_shared<MapType>()), policy_(policy) {}
   MetadataDictionary(const MetadataDictionary&) = default;
   MetadataDictionary(MetadataDictionary&&) = default;
@@ -248,7 +248,8 @@ class MetadataDictionary {
   /// merge (move) the contents of other dictionary into this dictionary
   void merge(MetadataDictionary& other);
 
-  /// Insert items the other dictionary into this dictionary. Pre-existing values are not updated.
+  /// Insert items the other dictionary into this dictionary. Pre-existing values are not
+  /// updated.
   void insert(MetadataDictionary& other);
 
   /**
diff --git a/include/holoscan/core/operator.hpp b/include/holoscan/core/operator.hpp
index 3072261..bf70262 100644
--- a/include/holoscan/core/operator.hpp
+++ b/include/holoscan/core/operator.hpp
@@ -584,9 +584,9 @@ class Operator : public ComponentBase {
   std::shared_ptr<MetadataDictionary> metadata() { return dynamic_metadata_; }
 
   /**
-   * @brief Get the metadata update policy used by this operator.
+   * @brief Determine if metadata is enabled for the fragment this operator belongs to.
    *
-   * @returns The metadata update policy used by this operator.
+   * @returns Boolean indicating if metadata is enabled.
    */
   bool is_metadata_enabled() { return is_metadata_enabled_; }
 
@@ -778,6 +778,8 @@ class Operator : public ComponentBase {
   std::shared_ptr<nvidia::gxf::GraphEntity> graph_entity_;  ///< GXF graph entity corresponding to
                                                             ///< the Operator
 
+  bool is_initialized_ = false;  ///< Whether the operator is initialized.
+
  private:
   ///  Set the operator codelet or any other backend codebase.
   void set_op_backend();
diff --git a/include/holoscan/operators/aja_source/aja_source.hpp b/include/holoscan/operators/aja_source/aja_source.hpp
index 25ae496..4242311 100644
--- a/include/holoscan/operators/aja_source/aja_source.hpp
+++ b/include/holoscan/operators/aja_source/aja_source.hpp
@@ -64,6 +64,7 @@ namespace holoscan::ops {
  * - **width**: Width of the video stream. Optional (default: `1920`).
  * - **height**: Height of the video stream. Optional (default: `1080`).
  * - **framerate**: Frame rate of the video stream. Optional (default: `60`).
+ * - **interlaced**: Whether the frame is interlaced (true) or progressive (false). Optional (default: `false`).
  * - **rdma**: Boolean indicating whether RDMA is enabled. Optional (default: `false`).
  * - **enable_overlay**: Boolean indicating whether a separate overlay channel is enabled. Optional
  *   (default: `false`).
@@ -103,6 +104,7 @@ class AJASourceOp : public holoscan::Operator {
   Parameter<uint32_t> width_;
   Parameter<uint32_t> height_;
   Parameter<uint32_t> framerate_;
+  Parameter<bool> interlaced_;
   Parameter<bool> use_rdma_;
   Parameter<bool> enable_overlay_;
   Parameter<NTV2Channel> overlay_channel_;
@@ -113,7 +115,7 @@ class AJASourceOp : public holoscan::Operator {
   // internal state
   CNTV2Card device_;
   NTV2DeviceID device_id_ = DEVICE_ID_NOTFOUND;
-  NTV2VideoFormat video_format_ = NTV2_FORMAT_1080p_6000_A;
+  NTV2VideoFormat video_format_ = NTV2_FORMAT_UNKNOWN;
   NTV2PixelFormat pixel_format_ = NTV2_FBF_ABGR;
   bool use_tsi_ = false;
   bool is_kona_hdmi_ = false;
diff --git a/include/holoscan/operators/holoviz/buffer_info.hpp b/include/holoscan/operators/holoviz/buffer_info.hpp
index f03ac6a..f9bf89d 100644
--- a/include/holoscan/operators/holoviz/buffer_info.hpp
+++ b/include/holoscan/operators/holoviz/buffer_info.hpp
@@ -19,6 +19,7 @@
 #define INCLUDE_HOLOSCAN_OPERATORS_HOLOVIZ_BUFFER_INFO_HPP
 
 #include <string>
+#include <vector>
 
 #include <holoviz/image_format.hpp>
 #include "holoscan/operators/holoviz/holoviz.hpp"
@@ -60,18 +61,24 @@ struct BufferInfo {
   uint32_t width = 0;
   uint32_t height = 0;
   nvidia::gxf::PrimitiveType element_type = nvidia::gxf::PrimitiveType::kCustom;
-  bool image_format_supported = false;
   HolovizOp::ImageFormat image_format = HolovizOp::ImageFormat::AUTO_DETECT;
   viz::ComponentSwizzle component_swizzle[4] = {viz::ComponentSwizzle::IDENTITY,
-                                                 viz::ComponentSwizzle::IDENTITY,
-                                                 viz::ComponentSwizzle::IDENTITY,
-                                                 viz::ComponentSwizzle::IDENTITY};
+                                                viz::ComponentSwizzle::IDENTITY,
+                                                viz::ComponentSwizzle::IDENTITY,
+                                                viz::ComponentSwizzle::IDENTITY};
   std::string name;
   /// points to the memory owned by either a tensor or video buffer
   const nvidia::byte* buffer_ptr = nullptr;
   nvidia::gxf::MemoryStorageType storage_type = nvidia::gxf::MemoryStorageType::kHost;
   uint64_t bytes_size = 0;
   nvidia::gxf::Tensor::stride_array_t stride{};
+
+  /// plane information
+  std::vector<nvidia::gxf::ColorPlane> color_planes;
+
+  HolovizOp::YuvModelConversion yuv_model_conversion =
+      HolovizOp::YuvModelConversion::YUV_601;
+  HolovizOp::YuvRange yuv_range = HolovizOp::YuvRange::ITU_FULL;
 };
 
 }  // namespace holoscan::ops
diff --git a/include/holoscan/operators/holoviz/codecs.hpp b/include/holoscan/operators/holoviz/codecs.hpp
index 808f940..f7e3cb0 100644
--- a/include/holoscan/operators/holoviz/codecs.hpp
+++ b/include/holoscan/operators/holoviz/codecs.hpp
@@ -173,6 +173,25 @@ struct codec<ops::HolovizOp::InputSpec> {
     if (!maybe_size) { forward_error(maybe_size); }
     total_size += maybe_size.value();
 
+    maybe_size = serialize_trivial_type<ops::HolovizOp::YuvModelConversion>(
+        spec.yuv_model_conversion_, endpoint);
+    if (!maybe_size) { forward_error(maybe_size); }
+    total_size += maybe_size.value();
+
+    maybe_size = serialize_trivial_type<ops::HolovizOp::YuvRange>(spec.yuv_range_, endpoint);
+    if (!maybe_size) { forward_error(maybe_size); }
+    total_size += maybe_size.value();
+
+    maybe_size =
+        serialize_trivial_type<ops::HolovizOp::ChromaLocation>(spec.x_chroma_location_, endpoint);
+    if (!maybe_size) { forward_error(maybe_size); }
+    total_size += maybe_size.value();
+
+    maybe_size =
+        serialize_trivial_type<ops::HolovizOp::ChromaLocation>(spec.y_chroma_location_, endpoint);
+    if (!maybe_size) { forward_error(maybe_size); }
+    total_size += maybe_size.value();
+
     maybe_size = codec<std::vector<float>>::serialize(spec.color_, endpoint);
     if (!maybe_size) { forward_error(maybe_size); }
     total_size += maybe_size.value();
@@ -224,6 +243,23 @@ struct codec<ops::HolovizOp::InputSpec> {
     if (!image_format) { forward_error(image_format); }
     out.image_format_ = image_format.value();
 
+    auto yuv_model_conversion =
+        deserialize_trivial_type<ops::HolovizOp::YuvModelConversion>(endpoint);
+    if (!yuv_model_conversion) { forward_error(image_format); }
+    out.yuv_model_conversion_ = yuv_model_conversion.value();
+
+    auto yuv_range = deserialize_trivial_type<ops::HolovizOp::YuvRange>(endpoint);
+    if (!yuv_range) { forward_error(image_format); }
+    out.yuv_range_ = yuv_range.value();
+
+    auto x_chroma_location = deserialize_trivial_type<ops::HolovizOp::ChromaLocation>(endpoint);
+    if (!x_chroma_location) { forward_error(image_format); }
+    out.x_chroma_location_ = x_chroma_location.value();
+
+    auto y_chroma_location = deserialize_trivial_type<ops::HolovizOp::ChromaLocation>(endpoint);
+    if (!y_chroma_location) { forward_error(image_format); }
+    out.y_chroma_location_ = y_chroma_location.value();
+
     auto color = codec<std::vector<float>>::deserialize(endpoint);
     if (!color) { forward_error(color); }
     out.color_ = color.value();
diff --git a/include/holoscan/operators/holoviz/holoviz.hpp b/include/holoscan/operators/holoviz/holoviz.hpp
index 7728153..83a033d 100644
--- a/include/holoscan/operators/holoviz/holoviz.hpp
+++ b/include/holoscan/operators/holoviz/holoviz.hpp
@@ -393,7 +393,7 @@ class HolovizOp : public Operator {
    * {component format}_{numeric format}
    *
    * - component format
-   *   - indicates the size in bits of the R, G, B and A components if present
+   *   - indicates the size in bits of the R, G, B, A or Y, U, V components if present
    * - numeric format
    *   - UNORM - unsigned normalize values, range [0, 1]
    *   - SNORM - signed normalized values, range [-1,1]
@@ -403,6 +403,12 @@ class HolovizOp : public Operator {
    *   - SRGB - the R, G, and B components are unsigned normalized values that
    *            represent values using sRGB nonlinear encoding, while the A
    *            component (if one exists) is a regular unsigned normalized value
+   * - multi-planar formats
+   *   - 2PLANE - data is stored in two separate memory planes
+   *   - 3PLANE - data is stored in three separate memory planes
+   * - YUV formats
+   *   - 420 - the horizontal and vertical resolution of the chroma (UV) planes is halved
+   *   - 422 - the horizontal of the chroma (UV) planes is halved
    *
    * Note: this needs to match the viz::ImageFormat enum (except the AUTO_DETECT value).
    */
@@ -540,6 +546,88 @@ class HolovizOp : public Operator {
                            ///  and an 8-bit R component stored with sRGB nonlinear
                            ///  encoding in bits 0..7.
 
+    Y8U8Y8V8_422_UNORM,  ///< specifies a four-component, 32-bit format containing a pair of Y
+                         ///  components, a V component, and a U component, collectively encoding a
+                         ///  2×1 rectangle of unsigned normalized RGB texel data. One Y value is
+                         ///  present at each i coordinate, with the U and V values shared across
+                         ///  both Y values and thus recorded at half the horizontal resolution of
+                         ///  the image. This format has an 8-bit Y component for the even i
+                         ///  coordinate in byte 0, an 8-bit U component in byte 1, an 8-bit Y
+                         ///  component for the odd i coordinate in byte 2, and an 8-bit V component
+                         ///  in byte 3. This format only supports images with a width that is a
+                         ///  multiple of two.
+    U8Y8V8Y8_422_UNORM,  ///< specifies a four-component, 32-bit format containing a pair of Y
+                         ///  components, a V component, and a U component, collectively encoding a
+                         ///  2×1 rectangle of unsigned normalized RGB texel data. One Y value is
+                         ///  present at each i coordinate, with the U and V values shared across
+                         ///  both Y values and thus recorded at half the horizontal resolution of
+                         ///  the image. This format has an 8-bit U component in byte 0, an 8-bit Y
+                         ///  component for the even i coordinate in byte 1, an 8-bit V component in
+                         ///  byte 2, and an 8-bit Y component for the odd i coordinate in byte 3.
+                         ///  This format only supports images with a width that is a multiple of
+                         ///  two.
+    Y8_U8V8_2PLANE_420_UNORM,  ///< specifies an unsigned normalized multi-planar format that has an
+                               ///  8-bit Y component in plane 0, and a two-component, 16-bit UV
+                               ///  plane 1 consisting of an 8-bit U component in byte 0 and an
+                               ///  8-bit V component in byte 1. The horizontal and vertical
+                               ///  dimensions of the UV plane are halved relative to the image
+                               ///  dimensions. This format only supports images with a width and
+                               ///  height that are a multiple of two.
+    Y8_U8V8_2PLANE_422_UNORM,  ///< specifies an unsigned normalized multi-planar format that has an
+                               ///  8-bit Y component in plane 0, and a two-component, 16-bit UV
+                               ///  plane 1 consisting of an 8-bit U component in byte 0 and an
+                               ///  8-bit V component in byte 1. The horizontal dimension of the UV
+                               ///  plane is halved relative to the image dimensions. This format
+                               ///  only supports images with a width that is a multiple of two.
+    Y8_U8_V8_3PLANE_420_UNORM,  ///< specifies an unsigned normalized multi-planar format that has
+                                ///< an
+                                ///  8-bit Y component in plane 0, an 8-bit U component in plane 1,
+                                ///  and an 8-bit V component in plane 2. The horizontal and
+                                ///  vertical dimensions of the V and U planes are halved relative
+                                ///  to the image dimensions. This format only supports images with
+                                ///  a width and height that are a multiple of two.
+    Y8_U8_V8_3PLANE_422_UNORM,  ///< specifies an unsigned normalized multi-planar format that has
+                                ///< an
+                                ///  8-bit Y component in plane 0, an 8-bit U component in plane 1,
+                                ///  and an 8-bit V component in plane 2. The horizontal dimension
+                                ///  of the V and U plane is halved relative to the image
+                                ///  dimensions. This format only supports images with a width that
+                                ///  is a multiple of two.
+    Y16_U16V16_2PLANE_420_UNORM,  ///< specifies an unsigned normalized multi-planar format that has
+                                  ///< a
+                                  ///  16-bit Y component in each 16-bit word of plane 0, and a
+                                  ///  two-component, 32-bit UV plane 1 consisting of a 16-bit U
+                                  ///  component in the word in bytes 0..1, and a 16-bit V component
+                                  ///  in the word in bytes 2..3. The horizontal and vertical
+                                  ///  dimensions of the UV plane are halved relative to the image
+                                  ///  dimensions. This format only supports images with a width and
+                                  ///  height that are a multiple of two.
+    Y16_U16V16_2PLANE_422_UNORM,  ///< specifies an unsigned normalized multi-planar format that has
+                                  ///< a
+                                  ///  16-bit Y component in each 16-bit word of plane 0, and a
+                                  ///  two-component, 32-bit UV plane 1 consisting of a 16-bit U
+                                  ///  component in the word in bytes 0..1, and a 16-bit V component
+                                  ///  in the word in bytes 2..3. The horizontal dimension of the UV
+                                  ///  plane is halved relative to the image dimensions. This format
+                                  ///  only supports images with a width that is a multiple of two.
+    Y16_U16_V16_3PLANE_420_UNORM,  ///< specifies an unsigned normalized multi-planar format that
+                                   ///< has
+                                   ///  a 16-bit Y component in each 16-bit word of plane 0, a
+                                   ///  16-bit U component in each 16-bit word of plane 1, and a
+                                   ///  16-bit V component in each 16-bit word of plane 2. The
+                                   ///  horizontal and vertical dimensions of the V and U planes are
+                                   ///  halved relative to the image dimensions. This format only
+                                   ///  supports images with a width and height that are a multiple
+                                   ///  of two.
+    Y16_U16_V16_3PLANE_422_UNORM,  ///< specifies an unsigned normalized multi-planar format that
+                                   ///< has
+                                   ///  a 16-bit Y component in each 16-bit word of plane 0, a
+                                   ///  16-bit U component in each 16-bit word of plane 1, and a
+                                   ///  16-bit V component in each 16-bit word of plane 2. The
+                                   ///  horizontal dimension of the V and U plane is halved relative
+                                   ///  to the image dimensions. This format only supports images
+                                   ///  with a width that is a multiple of two.
+
     AUTO_DETECT = -1  ///< Auto detect the image format. If the input is a video buffer the format
                       ///  of the video buffer is used, if the input is a tensor then the format
                       ///  depends on the component count
@@ -549,6 +637,36 @@ class HolovizOp : public Operator {
                       ///  and the component type.
   };
 
+  /**
+   * Defines the conversion from the source color model to the shader color model.
+   */
+  enum class YuvModelConversion {
+    YUV_601,   ///< specifies the color model conversion from YUV to RGB defined in BT.601
+    YUV_709,   ///< specifies the color model conversion from YUV to RGB defined in BT.709
+    YUV_2020,  ///< specifies the color model conversion from YUV to RGB defined in BT.2020
+  };
+
+  /**
+   * Specifies the YUV range
+   */
+  enum class YuvRange {
+    ITU_FULL,    ///< specifies that the full range of the encoded values are valid and
+                 ///< interpreted according to the ITU “full range” quantization rules
+    ITU_NARROW,  ///< specifies that headroom and foot room are reserved in the numerical range
+                 ///< of encoded values, and the remaining values are expanded according to the
+                 ///< ITU “narrow range” quantization rules
+  };
+
+  /**
+   * Defines the location of downsampled chroma component samples relative to the luma samples.
+   */
+  enum class ChromaLocation {
+    COSITED_EVEN,  ///< specifies that downsampled chroma samples are aligned with luma samples with
+                   ///< even coordinates
+    MIDPOINT,  ///< specifies that downsampled chroma samples are located half way between each even
+               ///< luma sample and the nearest higher odd luma sample.
+  };
+
   /**
    * Depth map render mode.
    */
@@ -589,9 +707,18 @@ class HolovizOp : public Operator {
         0;  ///< layer priority, determines the render order, layers with higher priority values are
             ///< rendered on top of layers with lower priority values
     ImageFormat image_format_ = ImageFormat::AUTO_DETECT;  ///< image format
-    std::vector<float> color_{1.f, 1.f, 1.f, 1.f};         ///< color of rendered geometry
-    float line_width_ = 1.f;         ///< line width for geometry made of lines
-    float point_size_ = 1.f;         ///< point size for geometry made of points
+    YuvModelConversion yuv_model_conversion_ =
+        YuvModelConversion::YUV_601;           ///< YUV model conversion
+    YuvRange yuv_range_ = YuvRange::ITU_FULL;  ///< YUV range
+    ChromaLocation x_chroma_location_ =
+        ChromaLocation::COSITED_EVEN;  ///< chroma location in x direction for formats which are
+                                       ///< chroma downsampled in width (420 and 422)
+    ChromaLocation y_chroma_location_ =
+        ChromaLocation::COSITED_EVEN;  ///< chroma location in y direction for formats which are
+                                       ///< chroma downsampled in height (420)
+    std::vector<float> color_{1.f, 1.f, 1.f, 1.f};  ///< color of rendered geometry
+    float line_width_ = 1.f;                        ///< line width for geometry made of lines
+    float point_size_ = 1.f;                        ///< point size for geometry made of points
     std::vector<std::string> text_;  ///< array of text strings, used when type_ is TEXT.
     DepthMapRenderMode depth_map_render_mode_ =
         DepthMapRenderMode::POINTS;  ///< depth map render mode, used if type_ is
diff --git a/tests/system/ping_tensor_rx_op.hpp b/include/holoscan/operators/ping_tensor_rx/ping_tensor_rx.hpp
similarity index 59%
rename from tests/system/ping_tensor_rx_op.hpp
rename to include/holoscan/operators/ping_tensor_rx/ping_tensor_rx.hpp
index 0ef5af0..9fe7bbc 100644
--- a/tests/system/ping_tensor_rx_op.hpp
+++ b/include/holoscan/operators/ping_tensor_rx/ping_tensor_rx.hpp
@@ -15,17 +15,31 @@
  * limitations under the License.
  */
 
-#ifndef SYSTEM_PING_TENSOR_RX_OP_HPP
-#define SYSTEM_PING_TENSOR_RX_OP_HPP
+#ifndef HOLOSCAN_OPERATORS_PING_TENSOR_RX_PING_TENSOR_RX_HPP
+#define HOLOSCAN_OPERATORS_PING_TENSOR_RX_PING_TENSOR_RX_HPP
 
 #include <string>
 #include <vector>
 
 #include <holoscan/holoscan.hpp>
 
-namespace holoscan {
-namespace ops {
+namespace holoscan::ops {
 
+/**
+ * @brief Simple tensor receive operator.
+ *
+ * This is an example of a native operator with one input port.
+ *
+ * This operator is intended for use in test cases and example applications.
+ *
+ * On each tick, it receives a TensorMap and loops over each tensor in the map. For each, it will
+ * print the tensor's name and shape.
+ *
+ * ==Named Inputs==
+ *
+ * - **in** : nvidia::gxf::Tensor(s)
+ *   - One or more received tensors (i.e. a TensorMap).
+ */
 class PingTensorRxOp : public Operator {
  public:
   HOLOSCAN_OPERATOR_FORWARD_ARGS(PingTensorRxOp)
@@ -37,11 +51,9 @@ class PingTensorRxOp : public Operator {
   void compute(InputContext& op_input, OutputContext&, ExecutionContext&) override;
 
  private:
-  int count_ = 1;
-  Parameter<std::string> tensor_name_;
+  size_t count_ = 1;
 };
 
-}  // namespace ops
-}  // namespace holoscan
+}  // namespace holoscan::ops
 
-#endif /* SYSTEM_PING_TENSOR_RX_OP_HPP */
+#endif /* HOLOSCAN_OPERATORS_PING_TENSOR_RX_PING_TENSOR_RX_HPP */
diff --git a/include/holoscan/operators/ping_tensor_tx/ping_tensor_tx.hpp b/include/holoscan/operators/ping_tensor_tx/ping_tensor_tx.hpp
new file mode 100644
index 0000000..d34e80c
--- /dev/null
+++ b/include/holoscan/operators/ping_tensor_tx/ping_tensor_tx.hpp
@@ -0,0 +1,102 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HOLOSCAN_OPERATORS_PING_TENSOR_TX_HPP
+#define HOLOSCAN_OPERATORS_PING_TENSOR_TX_HPP
+
+#include <memory>
+#include <string>
+
+#include <holoscan/holoscan.hpp>
+
+namespace holoscan::ops {
+
+/**
+ * @brief Example Tensor transmitter operator.
+ *
+ * On each tick, it transmits a single tensor on the "out" port.
+ *
+ * This operator is intended for use in test cases and example applications.
+ *
+ * ==Named Outputs==
+ *
+ * - **out** : nvidia::gxf::Tensor
+ *   - A generated 1D (H), 2D (HW), 3D (HWC) or 4D (NHWC) tensor (with uninitialized data values).
+ *     Depending on the parameters set, this tensor can be in system memory, pinned host memory or
+ *     device memory. Setting `batch_size`, `columns` or `channels` to 0 will omit the
+ *     corresponding dimension. Notation used: N = batch, H = rows, W = columns, C = channels.
+ *
+ * ==Parameters==
+ *
+ * - **allocator**: The memory allocator to use. When not set, a default UnboundedAllocator is used.
+ * - **storage_type**: A string indicating where the memory should be allocated. Options are
+ *   "system" (system/CPU memory), "host" (CUDA pinned host memory) or "device" (GPU memory). The
+ *   `allocator` takes care of allocating memory of the indicated type. The default is "system".
+ * - **batch_size**: Size of the batch dimension of the generated tensor. If set to 0, this
+ *    dimension is omitted. The default is 0.
+ * - **rows**: The number of rows in the generated tensor. This dimension must be >= 1. The default
+ *   is 32.
+ * - **columns**: The number of columns in the generated tensor. If set to 0, this dimension is
+ *    omitted. The default is 64.
+ * - **channels**: The number of channels in the generated tensor. If set to 0, this dimension is
+ *    omitted. The default is 0.
+ * - **data_type_**: A string representing the data type for the generated tensor. Must be one of
+ *   "int8_t", "int16_t", "int32_t", "int64_t", "uint8_t", "uint16_t", "uint32_t", "uint64_t",
+ *   "float", "double", "complex<float", or "complex<double>". The default is "uint8_t".
+ * - **tensor_name**: The name of the generated tensor. The default name is "tensor".
+ *
+ * ==Device Memory Requirements==
+ *
+ * When using this operator with a `BlockMemoryPool`, the minimum `block_size` is
+ * `(batch_size * rows * columns * channels * element_size_bytes)` where `element_size_bytes` is
+ * is the number of bytes for a single element of the specified `data_type`. Only a single memory
+ * block is used.
+ */
+class PingTensorTxOp : public Operator {
+ public:
+  HOLOSCAN_OPERATOR_FORWARD_ARGS(PingTensorTxOp)
+
+  PingTensorTxOp() = default;
+
+  void initialize() override;
+  void setup(OperatorSpec& spec) override;
+  void compute(InputContext&, OutputContext& op_output, ExecutionContext& context) override;
+
+  nvidia::gxf::PrimitiveType element_type() {
+    if (element_type_.has_value()) { return element_type_.value(); }
+    element_type_ = primitive_type(data_type_.get());
+    return element_type_.value();
+  }
+
+ private:
+  nvidia::gxf::PrimitiveType primitive_type(const std::string& data_type);
+  std::optional<nvidia::gxf::PrimitiveType> element_type_;
+  size_t count_ = 1;
+
+  Parameter<std::shared_ptr<Allocator>> allocator_{nullptr};
+  Parameter<std::string> storage_type_{"system"};
+  Parameter<int32_t> batch_size_{0};
+  Parameter<int32_t> rows_{32};
+  Parameter<int32_t> columns_{64};
+  Parameter<int32_t> channels_{0};
+  Parameter<std::string> data_type_{"uint8_t"};
+  Parameter<std::string> tensor_name_{"tensor"};
+};
+
+}  // namespace holoscan::ops
+
+#endif /* HOLOSCAN_OPERATORS_PING_TENSOR_TX_HPP */
diff --git a/include/holoscan/operators/v4l2_video_capture/v4l2_video_capture.hpp b/include/holoscan/operators/v4l2_video_capture/v4l2_video_capture.hpp
index f51ac0f..c47314e 100644
--- a/include/holoscan/operators/v4l2_video_capture/v4l2_video_capture.hpp
+++ b/include/holoscan/operators/v4l2_video_capture/v4l2_video_capture.hpp
@@ -33,12 +33,13 @@ namespace holoscan::ops {
  *
  * Inputs a video stream from a V4L2 node, including USB cameras and HDMI IN.
  * - Input stream is on host. If no pixel format is specified in the yaml configuration file, the
- *   pixel format will be automatically selected. However, only `AB24`, `YUYV`, and MJPG are then
- *   supported.
+ *   pixel format will be automatically selected. However, only `AB24`, `YUYV`, `MJPG`, and `RGB3`
+ *   are then supported.
  *   If a pixel format is specified in the yaml file, then this format will be used. However, note
- *   that the operator then expects that this format can be encoded as RGBA32. If not, the behavior
- *   is undefined.
- * - Output stream is on host. Always RGBA32 at this time.
+ *   if `pass_through` is `False` that the operator then expects that this format can be encoded as
+ *   RGBA32. If not, the behavior is undefined.
+ * - Output stream is on host. if `pass_through` is `False` (the default) the video buffer is
+ *   converted to RGBA32, else output the input video buffer unmodified.
  *
  * Use `holoscan::ops::FormatConverterOp` to move data from the host to a GPU device.
  *
@@ -58,6 +59,8 @@ namespace holoscan::ops {
  * - **num_buffers**: Number of V4L2 buffers to use. Optional (default: `4`).
  * - **pixel_format**: Video stream pixel format (little endian four character code (fourcc)).
  *   Default value is `"auto"`.
+ * - **pass_through**: If set, pass through the input buffer to the output unmodified, else convert
+ *   to RGBA32 (default `false`).
  * - **exposure_time**: Exposure time of the camera sensor in multiples of 100 μs (e.g. setting
  *   exposure_time to 100 is 10 ms). Optional (default: auto exposure, or camera sensor default).
  *   Use `v4l2-ctl -d /dev/<your_device> -L` for a range of values supported by your device.
@@ -113,6 +116,7 @@ class V4L2VideoCaptureOp : public Operator {
   Parameter<uint32_t> height_;
   Parameter<uint32_t> num_buffers_;
   Parameter<std::string> pixel_format_;
+  Parameter<bool> pass_through_;
   Parameter<uint32_t> exposure_time_;
   Parameter<uint32_t> gain_;
 
@@ -127,9 +131,6 @@ class V4L2VideoCaptureOp : public Operator {
   void v4l2_start();
   void v4l2_read_buffer(v4l2_buffer& buf);
 
-  void YUYVToRGBA(const void* yuyv, void* rgba, size_t width, size_t height);
-  void MJPEGToRGBA(const void* mjpg, void* rgba, size_t width, size_t height);
-
   struct Buffer {
     void* ptr;
     size_t length;
@@ -140,6 +141,9 @@ class V4L2VideoCaptureOp : public Operator {
   uint32_t width_use_{0};
   uint32_t height_use_{0};
   uint32_t pixel_format_use_{V4L2_PIX_FMT_RGBA32};
+
+  typedef void (*ConverterFunc)(const void* in, void* rgba, size_t width, size_t height);
+  ConverterFunc converter_{nullptr};
 };
 
 }  // namespace holoscan::ops
diff --git a/include/holoscan/operators/video_stream_replayer/video_stream_replayer.hpp b/include/holoscan/operators/video_stream_replayer/video_stream_replayer.hpp
index b5fa5a2..fda43ec 100644
--- a/include/holoscan/operators/video_stream_replayer/video_stream_replayer.hpp
+++ b/include/holoscan/operators/video_stream_replayer/video_stream_replayer.hpp
@@ -15,16 +15,18 @@
  * limitations under the License.
  */
 
-#ifndef HOLOSCAN_OPERATORS_STREAM_PLAYBACK_VIDEO_STREAM_REPLAYER_HPP
-#define HOLOSCAN_OPERATORS_STREAM_PLAYBACK_VIDEO_STREAM_REPLAYER_HPP
+#ifndef HOLOSCAN_OPERATORS_VIDEO_STREAM_REPLAYER_VIDEO_STREAM_REPLAYER_HPP
+#define HOLOSCAN_OPERATORS_VIDEO_STREAM_REPLAYER_VIDEO_STREAM_REPLAYER_HPP
 
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>
 
-#include "holoscan/core/gxf/gxf_operator.hpp"
 #include "gxf/serialization/file_stream.hpp"
+#include "holoscan/core/conditions/gxf/boolean.hpp"
+#include "holoscan/core/gxf/gxf_operator.hpp"
+#include "holoscan/core/resources/gxf/allocator.hpp"
 
 namespace holoscan::ops {
 
@@ -53,6 +55,12 @@ namespace holoscan::ops {
  * - **count**: Number of frame counts to playback. If zero value is specified, it is ignored.
  *   If the count is less than the number of frames in the video, it would finish early.
  *   Optional (default: `0`).
+ * - **allocator**: The allocator used for Tensor objects. Currently this can only use the default
+ *   allocator type of `holoscan::UnboundedAllocator`.
+ *   Optional (default: `holoscan::UnboundedAllocator`)
+ * - **entity_serializer**: The entity serializer used for deserialization. The default is to use
+ *   a default-initialized ``holoscan::gxzf::StdEntitySerializer``. If this argument is
+ *   specified, then the `allocator` argument is ignored.
  */
 class VideoStreamReplayerOp : public holoscan::Operator {
  public:
@@ -70,6 +78,7 @@ class VideoStreamReplayerOp : public holoscan::Operator {
 
  private:
   Parameter<holoscan::IOSpec*> transmitter_;
+  Parameter<std::shared_ptr<holoscan::Allocator>> allocator_;
   Parameter<std::shared_ptr<holoscan::Resource>> entity_serializer_;
   Parameter<std::shared_ptr<BooleanCondition>> boolean_scheduling_term_;
   Parameter<std::string> directory_;
@@ -98,4 +107,4 @@ class VideoStreamReplayerOp : public holoscan::Operator {
 
 }  // namespace holoscan::ops
 
-#endif /* HOLOSCAN_OPERATORS_STREAM_PLAYBACK_VIDEO_STREAM_REPLAYER_HPP */
+#endif /* HOLOSCAN_OPERATORS_VIDEO_STREAM_REPLAYER_VIDEO_STREAM_REPLAYER_HPP */
diff --git a/include/holoscan/utils/cuda_macros.hpp b/include/holoscan/utils/cuda_macros.hpp
new file mode 100644
index 0000000..4b1a1c5
--- /dev/null
+++ b/include/holoscan/utils/cuda_macros.hpp
@@ -0,0 +1,75 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HOLOSCAN_UTILS_CUDA_MACROS_HPP
+#define HOLOSCAN_UTILS_CUDA_MACROS_HPP
+
+#include <cuda_runtime.h>
+
+#include <common/logger.hpp>  // GXF_LOG_ERROR, GXF_LOG_WARNING
+
+// Note: ({ ... }) here is a GNU statement expression and not standard C++
+// see: https://gcc.gnu.org/onlinedocs/gcc/Statement-Exprs.html
+// It should be supported by both clang and gcc, but maybe not by MSVC
+#define HOLOSCAN_CUDA_CALL(stmt)                                                           \
+  ({                                                                                       \
+    cudaError_t _holoscan_cuda_err = stmt;                                                 \
+    if (cudaSuccess != _holoscan_cuda_err) {                                               \
+      GXF_LOG_ERROR("CUDA Runtime call %s in line %d of file %s failed with '%s' (%d).\n", \
+                    #stmt,                                                                 \
+                    __LINE__,                                                              \
+                    __FILE__,                                                              \
+                    cudaGetErrorString(_holoscan_cuda_err),                                \
+                    _holoscan_cuda_err);                                                   \
+    }                                                                                      \
+    _holoscan_cuda_err;                                                                    \
+  })
+
+#define HOLOSCAN_CUDA_CALL_WARN(stmt)                                                        \
+  ({                                                                                         \
+    cudaError_t _holoscan_cuda_err = stmt;                                                   \
+    if (cudaSuccess != _holoscan_cuda_err) {                                                 \
+      GXF_LOG_WARNING("CUDA Runtime call %s in line %d of file %s failed with '%s' (%d).\n", \
+                      #stmt,                                                                 \
+                      __LINE__,                                                              \
+                      __FILE__,                                                              \
+                      cudaGetErrorString(_holoscan_cuda_err),                                \
+                      _holoscan_cuda_err);                                                   \
+    }                                                                                        \
+    _holoscan_cuda_err;                                                                      \
+  })
+
+#define HOLOSCAN_CUDA_CALL_WARN_MSG(stmt, ...)                                 \
+  ({                                                                           \
+    cudaError_t _holoscan_cuda_err = HOLOSCAN_CUDA_CALL_WARN(stmt);            \
+    if (_holoscan_cuda_err != cudaSuccess) { HOLOSCAN_LOG_WARN(__VA_ARGS__); } \
+    _holoscan_cuda_err;                                                        \
+  })
+
+#define HOLOSCAN_CUDA_CALL_ERR_MSG(stmt, ...)                                   \
+  ({                                                                            \
+    cudaError_t _holoscan_cuda_err = HOLOSCAN_CUDA_CALL(stmt);                  \
+    if (_holoscan_cuda_err != cudaSuccess) { HOLOSCAN_LOG_ERROR(__VA_ARGS__); } \
+    _holoscan_cuda_err;                                                         \
+  })
+
+#define HOLOSCAN_CUDA_CALL_THROW_ERROR(stmt, ...)                                           \
+  do {                                                                                      \
+    if (HOLOSCAN_CUDA_CALL(stmt) != cudaSuccess) { throw std::runtime_error(__VA_ARGS__); } \
+  } while (0)
+
+#endif /* HOLOSCAN_UTILS_CUDA_MACROS_HPP */
diff --git a/modules/holoinfer/src/CMakeLists.txt b/modules/holoinfer/src/CMakeLists.txt
index f95c408..4dd564e 100644
--- a/modules/holoinfer/src/CMakeLists.txt
+++ b/modules/holoinfer/src/CMakeLists.txt
@@ -20,6 +20,8 @@ project(holoinfer VERSION ${HOLOSCAN_BUILD_VERSION} LANGUAGES CXX CUDA)
 find_package(CUDAToolkit REQUIRED)
 
 if(HOLOSCAN_BUILD_LIBTORCH)
+    # It is necessary to set the TORCH_CUDA_ARCH_LIST explicitly for newer versions of Torch (circa v2.3.0)
+    set(TORCH_CUDA_ARCH_LIST "3.5 5.0 8.0 8.6 8.9 9.0")
     find_package(Torch REQUIRED)
     find_package(TorchVision REQUIRED)
     add_library(holoinfer_torch SHARED infer/torch/core.cpp)
diff --git a/modules/holoviz/examples/demo/Main.cpp b/modules/holoviz/examples/demo/Main.cpp
index b9af568..f22e592 100644
--- a/modules/holoviz/examples/demo/Main.cpp
+++ b/modules/holoviz/examples/demo/Main.cpp
@@ -25,6 +25,7 @@
 #include <stb_image_write.h>
 
 #include <algorithm>
+#include <array>
 #include <chrono>
 #include <iostream>
 #include <memory>
@@ -39,9 +40,12 @@ namespace viz = holoscan::viz;
 enum class Source { HOST, DEVICE, ARRAY };
 static const char* source_items[]{"Host", "Device", "Array"};
 
-static const char* format_items[]{"R8_UINT", "R8G8B8_UNORM", "R8G8B8A8_UNORM"};
-static viz::ImageFormat formats[]{
-    viz::ImageFormat::R8_UINT, viz::ImageFormat::R8G8B8_UNORM, viz::ImageFormat::R8G8B8A8_UNORM};
+static const char* format_items[]{
+    "R8_UINT", "R8G8B8_UNORM", "R8G8B8A8_UNORM", "Y8_U8V8_2PLANE_420_UNORM"};
+static viz::ImageFormat formats[]{viz::ImageFormat::R8_UINT,
+                                  viz::ImageFormat::R8G8B8_UNORM,
+                                  viz::ImageFormat::R8G8B8A8_UNORM,
+                                  viz::ImageFormat::Y8_U8V8_2PLANE_420_UNORM};
 
 // define the '<<' operators to get a nice output
 #define CASE(VALUE)            \
@@ -91,18 +95,16 @@ std::chrono::milliseconds elapsed;
 uint32_t iterations = 0;
 float fps = 0.f;
 
-// memory
-std::unique_ptr<uint8_t> host_mem_r8;
-std::unique_ptr<uint8_t> host_mem_r8g8b8;
-std::unique_ptr<uint8_t> host_mem_r8g8b8a8;
-
-std::vector<uint32_t> palette;
-
 // cuda
 CUcontext cuda_context = nullptr;
-CUdeviceptr cu_device_mem_r8 = 0;
-CUdeviceptr cu_device_mem_r8g8b8 = 0;
-CUdeviceptr cu_device_mem_r8g8b8a8 = 0;
+
+// source data
+struct SourceData {
+  std::array<std::unique_ptr<uint8_t>, 3> host_mems;
+  std::array<CUdeviceptr, 3> cu_device_mems;
+};
+std::array<SourceData, IM_ARRAYSIZE(formats)> source_data;
+std::vector<uint32_t> palette;
 
 void tick() {
   if (start.time_since_epoch().count() == 0) {
@@ -176,55 +178,34 @@ void tick() {
     viz::LayerOpacity(image_layer_opacity);
     viz::LayerPriority(image_layer_priority);
 
-    if ((formats[current_format_index] == viz::ImageFormat::R8G8B8_UNORM) ||
-        (formats[current_format_index] == viz::ImageFormat::R8G8B8A8_UNORM)) {
-      // Color image
-
-      // host memory
-      switch (current_source) {
-        case Source::HOST: {
-          const void* data = nullptr;
-          switch (formats[current_format_index]) {
-            case viz::ImageFormat::R8G8B8_UNORM:
-              data = host_mem_r8g8b8.get();
-              break;
-            case viz::ImageFormat::R8G8B8A8_UNORM:
-              data = host_mem_r8g8b8a8.get();
-              break;
-            default:
-              throw std::runtime_error("Unhandled image format");
-          }
-          viz::ImageHost(width, height, formats[current_format_index], data);
-          break;
-        }
-        case Source::DEVICE: {
-          CUdeviceptr device_ptr = 0;
-          switch (formats[current_format_index]) {
-            case viz::ImageFormat::R8G8B8_UNORM:
-              device_ptr = cu_device_mem_r8g8b8;
-              break;
-            case viz::ImageFormat::R8G8B8A8_UNORM:
-              device_ptr = cu_device_mem_r8g8b8a8;
-              break;
-            default:
-              throw std::runtime_error("Unhandled image format");
-          }
-          viz::ImageCudaDevice(width, height, formats[current_format_index], device_ptr);
-          break;
-        }
-      }
-    } else {
+    if (formats[current_format_index] == viz::ImageFormat::R8_UINT) {
       // Image with LUT
       viz::LUT(palette.size(),
                viz::ImageFormat::R8G8B8A8_UNORM,
                palette.size() * sizeof(uint32_t),
                palette.data());
+    }
 
-      if (current_source == Source::DEVICE) {
-        viz::ImageCudaDevice(width, height, formats[current_format_index], cu_device_mem_r8);
-      } else {
-        viz::ImageHost(width, height, formats[current_format_index], host_mem_r8.get());
-      }
+    if (current_source == Source::DEVICE) {
+      viz::ImageCudaDevice(width,
+                           height,
+                           formats[current_format_index],
+                           source_data[current_format_index].cu_device_mems[0],
+                           0,
+                           source_data[current_format_index].cu_device_mems[1],
+                           0,
+                           source_data[current_format_index].cu_device_mems[2],
+                           0);
+    } else {
+      viz::ImageHost(width,
+                     height,
+                     formats[current_format_index],
+                     source_data[current_format_index].host_mems[0].get(),
+                     0,
+                     source_data[current_format_index].host_mems[1].get(),
+                     0,
+                     source_data[current_format_index].host_mems[2].get(),
+                     0);
     }
 
     viz::EndLayer();
@@ -362,19 +343,13 @@ void initCuda() {
 }
 
 void cleanupCuda() {
-  if (cu_device_mem_r8) {
-    if (cuMemFree(cu_device_mem_r8) != CUDA_SUCCESS) {
-      throw std::runtime_error("cuMemFree failed.");
-    }
-  }
-  if (cu_device_mem_r8g8b8) {
-    if (cuMemFree(cu_device_mem_r8g8b8) != CUDA_SUCCESS) {
-      throw std::runtime_error("cuMemFree failed.");
-    }
-  }
-  if (cu_device_mem_r8g8b8a8) {
-    if (cuMemFree(cu_device_mem_r8g8b8a8) != CUDA_SUCCESS) {
-      throw std::runtime_error("cuMemFree failed.");
+  for (auto&& source : source_data) {
+    for (auto&& cu_device_mem : source.cu_device_mems) {
+      if (cu_device_mem) {
+        if (cuMemFree(cu_device_mem) != CUDA_SUCCESS) {
+          throw std::runtime_error("cuMemFree failed.");
+        }
+      }
     }
   }
   if (cuda_context) {
@@ -399,61 +374,107 @@ void loadImage() {
                                         0);
   if (!image_data) { throw std::runtime_error("Loading image failed."); }
 
+  const uint32_t row_pitch = width * components;
+
+  // for YUV textures width and height must be a multiple of 2
+  width &= ~1;
+  height &= ~1;
+
   // allocate and set host memory
-  host_mem_r8.reset(new uint8_t[width * height]);
-  host_mem_r8g8b8.reset(new uint8_t[width * height * 3]);
-  host_mem_r8g8b8a8.reset(new uint8_t[width * height * 4]);
-
-  uint8_t const* src = image_data;
-
-  uint8_t* dst_r8 = host_mem_r8.get();
-  uint8_t* dst_r8g8b8a8 = host_mem_r8g8b8a8.get();
-  uint8_t* dst_r8g8b8 = host_mem_r8g8b8.get();
-  for (uint32_t i = 0; i < width * height; ++i) {
-    dst_r8g8b8[0] = src[0];
-    dst_r8g8b8[1] = src[1];
-    dst_r8g8b8[2] = src[2];
-    dst_r8g8b8 += 3;
-
-    dst_r8g8b8a8[0] = src[0];
-    dst_r8g8b8a8[1] = src[1];
-    dst_r8g8b8a8[2] = src[2];
-    dst_r8g8b8a8[3] = (components == 4) ? src[3] : 0xFF;
-    const uint32_t pixel = *reinterpret_cast<uint32_t*>(dst_r8g8b8a8);
-    dst_r8g8b8a8 += 4;
-
-    std::vector<uint32_t>::iterator it = std::find(palette.begin(), palette.end(), pixel);
-    if (it == palette.end()) {
-      palette.push_back(pixel);
-      it = --palette.end();
-    }
-    dst_r8[0] = std::distance(palette.begin(), it);
-    dst_r8 += 1;
+  source_data[0].host_mems[0].reset(new uint8_t[width * height]);
+  source_data[1].host_mems[0].reset(new uint8_t[width * height * 3]);
+  source_data[2].host_mems[0].reset(new uint8_t[width * height * 4]);
+  source_data[3].host_mems[0].reset(new uint8_t[width * height]);
+  source_data[3].host_mems[1].reset(new uint8_t[(width / 2) * (height / 2) * 2]);
+
+  uint8_t* dst_r8 = source_data[0].host_mems[0].get();
+  uint8_t* dst_r8g8b8 = source_data[1].host_mems[0].get();
+  uint8_t* dst_r8g8b8a8 = source_data[2].host_mems[0].get();
+  uint8_t* dst_y8 = source_data[3].host_mems[0].get();
+  uint8_t* dst_u8v8 = source_data[3].host_mems[1].get();
+  for (uint32_t y = 0; y < height; ++y) {
+    uint8_t const* src = &image_data[y * row_pitch];
+    for (uint32_t x = 0; x < width; ++x) {
+      const uint8_t r = src[0];
+      const uint8_t g = src[1];
+      const uint8_t b = src[2];
+
+      dst_r8g8b8[0] = r;
+      dst_r8g8b8[1] = g;
+      dst_r8g8b8[2] = b;
+      dst_r8g8b8 += 3;
+
+      dst_r8g8b8a8[0] = r;
+      dst_r8g8b8a8[1] = g;
+      dst_r8g8b8a8[2] = b;
+      dst_r8g8b8a8[3] = (components == 4) ? src[3] : 0xFF;
+      const uint32_t pixel = *reinterpret_cast<uint32_t*>(dst_r8g8b8a8);
+      dst_r8g8b8a8 += 4;
+
+      std::vector<uint32_t>::iterator it = std::find(palette.begin(), palette.end(), pixel);
+      if (it == palette.end()) {
+        palette.push_back(pixel);
+        it = --palette.end();
+      }
+      dst_r8[0] = std::distance(palette.begin(), it);
+      dst_r8 += 1;
+
+      // BT.601 full range RGB -> YUV
+      dst_y8[0] = (0.f + (0.299f * r) + (0.587f * g) + (0.114 * b)) + 0.5f;
+      dst_y8 += 1;
 
-    src += components;
+      if (!(x & 1) && !(y & 1)) {
+        dst_u8v8[0] = (128.f - (0.168736f * r) - (0.331264f * g) + (0.5f * b)) + 0.5f;
+        dst_u8v8[1] = (128.f + (0.5f * r) - (0.418688f * g) - (0.081312f * b)) + 0.5f;
+        dst_u8v8 += 2;
+      }
+
+      src += components;
+    }
   }
 
   stbi_image_free(image_data);
 
   // allocate and set device memory
-  if (cuMemAlloc(&cu_device_mem_r8, width * height) != CUDA_SUCCESS) {
+  if (cuMemAlloc(&source_data[0].cu_device_mems[0], width * height) != CUDA_SUCCESS) {
     throw std::runtime_error("cuMemAlloc failed.");
   }
-  if (cuMemcpyHtoD(cu_device_mem_r8, host_mem_r8.get(), width * height) != CUDA_SUCCESS) {
+  if (cuMemcpyHtoD(source_data[0].cu_device_mems[0],
+                   source_data[0].host_mems[0].get(),
+                   width * height) != CUDA_SUCCESS) {
     throw std::runtime_error("cuMemcpyHtoD failed.");
   }
-  if (cuMemAlloc(&cu_device_mem_r8g8b8, width * height * 3) != CUDA_SUCCESS) {
+  if (cuMemAlloc(&source_data[1].cu_device_mems[0], width * height * 3) != CUDA_SUCCESS) {
     throw std::runtime_error("cuMemAlloc failed.");
   }
-  if (cuMemcpyHtoD(cu_device_mem_r8g8b8, host_mem_r8g8b8.get(), width * height * 3) !=
-      CUDA_SUCCESS) {
+  if (cuMemcpyHtoD(source_data[1].cu_device_mems[0],
+                   source_data[1].host_mems[0].get(),
+                   width * height * 3) != CUDA_SUCCESS) {
     throw std::runtime_error("cuMemcpyHtoD failed.");
   }
-  if (cuMemAlloc(&cu_device_mem_r8g8b8a8, width * height * 4) != CUDA_SUCCESS) {
+  if (cuMemAlloc(&source_data[2].cu_device_mems[0], width * height * 4) != CUDA_SUCCESS) {
     throw std::runtime_error("cuMemAlloc failed.");
   }
-  if (cuMemcpyHtoD(cu_device_mem_r8g8b8a8, host_mem_r8g8b8a8.get(), width * height * 4) !=
+  if (cuMemcpyHtoD(source_data[2].cu_device_mems[0],
+                   source_data[2].host_mems[0].get(),
+                   width * height * 4) != CUDA_SUCCESS) {
+    throw std::runtime_error("cuMemcpyHtoD failed.");
+  }
+  if (cuMemAlloc(&source_data[3].cu_device_mems[0], width * height) != CUDA_SUCCESS) {
+    throw std::runtime_error("cuMemAlloc failed.");
+  }
+  if (cuMemcpyHtoD(source_data[3].cu_device_mems[0],
+                   source_data[3].host_mems[0].get(),
+                   width * height) != CUDA_SUCCESS) {
+    throw std::runtime_error("cuMemcpyHtoD failed.");
+  }
+  if (cuMemAlloc(&source_data[3].cu_device_mems[1], (width / 2) * (height / 2) * 2) !=
       CUDA_SUCCESS) {
+    throw std::runtime_error("cuMemAlloc failed.");
+  }
+  if (cuMemcpyHtoD(source_data[3].cu_device_mems[1],
+                   source_data[3].host_mems[1].get(),
+                   (width / 2) * (height / 2) * 2) != CUDA_SUCCESS) {
     throw std::runtime_error("cuMemcpyHtoD failed.");
   }
 }
@@ -570,8 +591,8 @@ int main(int argc, char** argv) {
     if (benchmark_mode) {
       for (auto source : {Source::DEVICE, Source::HOST}) {
         current_source = source;
-        for (auto format_index : {2, 1, 0}) {
-          current_format_index = format_index;
+        for (current_format_index = 0; current_format_index < IM_ARRAYSIZE(format_items);
+             ++current_format_index) {
           start = std::chrono::steady_clock::time_point();
           do { tick(); } while (elapsed.count() < 2000);
           std::cout << current_source << " " << format_items[current_format_index] << " "
diff --git a/modules/holoviz/src/cuda/cuda_service.hpp b/modules/holoviz/src/cuda/cuda_service.hpp
index dd7a826..292eacd 100644
--- a/modules/holoviz/src/cuda/cuda_service.hpp
+++ b/modules/holoviz/src/cuda/cuda_service.hpp
@@ -32,15 +32,15 @@ namespace holoscan::viz {
 /**
  * CUDA runtime API error check helper
  */
-#define CudaRTCheck(FUNC)                                                                   \
-  {                                                                                         \
-    const cudaError_t result = FUNC;                                                        \
-    if (result != cudaSuccess) {                                                            \
-      std::stringstream buf;                                                                \
-      buf << "[" << __FILE__ << ":" << __LINE__ << "] CUDA driver error " << result << " (" \
-          << cudaGetErrorName(result) << "): " << cudaGetErrorString(result);               \
-      throw std::runtime_error(buf.str().c_str());                                          \
-    }                                                                                       \
+#define CudaRTCheck(FUNC)                                                                    \
+  {                                                                                          \
+    const cudaError_t result = FUNC;                                                         \
+    if (result != cudaSuccess) {                                                             \
+      std::stringstream buf;                                                                 \
+      buf << "[" << __FILE__ << ":" << __LINE__ << "] CUDA runtime error " << result << " (" \
+          << cudaGetErrorName(result) << "): " << cudaGetErrorString(result);                \
+      throw std::runtime_error(buf.str().c_str());                                           \
+    }                                                                                        \
   }
 
 /**
diff --git a/modules/holoviz/src/export.map b/modules/holoviz/src/export.map
index d710be9..2f4fc52 100644
--- a/modules/holoviz/src/export.map
+++ b/modules/holoviz/src/export.map
@@ -46,11 +46,14 @@
             "holoscan::viz::End()";
 
             "holoscan::viz::BeginImageLayer()";
-            "holoscan::viz::ImageCudaDevice(unsigned int, unsigned int, holoscan::viz::ImageFormat, unsigned long long, unsigned long)";
+            "holoscan::viz::ImageCudaDevice(unsigned int, unsigned int, holoscan::viz::ImageFormat, unsigned long long, unsigned long, unsigned long long, unsigned long, unsigned long long, unsigned long)";
             "holoscan::viz::ImageCudaArray(holoscan::viz::ImageFormat, CUarray_st*)";
-            "holoscan::viz::ImageHost(unsigned int, unsigned int, holoscan::viz::ImageFormat, void const*, unsigned long)";
+            "holoscan::viz::ImageHost(unsigned int, unsigned int, holoscan::viz::ImageFormat, void const*, unsigned long, void const*, unsigned long, void const*, unsigned long)";
             "holoscan::viz::LUT(unsigned int, holoscan::viz::ImageFormat, unsigned long, void const*, bool)";
             "holoscan::viz::ImageComponentMapping(holoscan::viz::ComponentSwizzle, holoscan::viz::ComponentSwizzle, holoscan::viz::ComponentSwizzle, holoscan::viz::ComponentSwizzle)";
+            "holoscan::viz::ImageYuvModelConversion(holoscan::viz::YuvModelConversion)";
+            "holoscan::viz::ImageYuvRange(holoscan::viz::YuvRange)";
+            "holoscan::viz::ImageChromaLocation(holoscan::viz::ChromaLocation, holoscan::viz::ChromaLocation)";
 
             "holoscan::viz::BeginImGuiLayer()";
 
diff --git a/modules/holoviz/src/holoviz.cpp b/modules/holoviz/src/holoviz.cpp
index 0a97ed6..85146fa 100644
--- a/modules/holoviz/src/holoviz.cpp
+++ b/modules/holoviz/src/holoviz.cpp
@@ -147,17 +147,37 @@ void BeginImageLayer() {
   Context::get().begin_image_layer();
 }
 
-void ImageCudaDevice(uint32_t w, uint32_t h, ImageFormat fmt, CUdeviceptr device_ptr,
-                     size_t row_pitch) {
-  Context::get().get_active_image_layer()->image_cuda_device(w, h, fmt, device_ptr, row_pitch);
+void ImageCudaDevice(uint32_t width, uint32_t height, ImageFormat fmt,
+                     CUdeviceptr device_ptr_plane_0, size_t row_pitch_plane_0,
+                     CUdeviceptr device_ptr_plane_1, size_t row_pitch_plane_1,
+                     CUdeviceptr device_ptr_plane_2, size_t row_pitch_plane_2) {
+  Context::get().get_active_image_layer()->image_cuda_device(width,
+                                                             height,
+                                                             fmt,
+                                                             device_ptr_plane_0,
+                                                             row_pitch_plane_0,
+                                                             device_ptr_plane_1,
+                                                             row_pitch_plane_1,
+                                                             device_ptr_plane_2,
+                                                             row_pitch_plane_2);
 }
 
 void ImageCudaArray(ImageFormat fmt, CUarray array) {
   Context::get().get_active_image_layer()->image_cuda_array(fmt, array);
 }
 
-void ImageHost(uint32_t w, uint32_t h, ImageFormat fmt, const void* data, size_t row_pitch) {
-  Context::get().get_active_image_layer()->image_host(w, h, fmt, data, row_pitch);
+void ImageHost(uint32_t w, uint32_t h, ImageFormat fmt, const void* data_plane_0,
+               size_t row_pitch_plane_0, const void* data_plane_1, size_t row_pitch_plane_1,
+               const void* data_plane_2, size_t row_pitch_plane_2) {
+  Context::get().get_active_image_layer()->image_host(w,
+                                                      h,
+                                                      fmt,
+                                                      data_plane_0,
+                                                      row_pitch_plane_0,
+                                                      data_plane_1,
+                                                      row_pitch_plane_1,
+                                                      data_plane_2,
+                                                      row_pitch_plane_2);
 }
 
 void LUT(uint32_t size, ImageFormat fmt, size_t data_size, const void* data, bool normalized) {
@@ -169,6 +189,19 @@ void ImageComponentMapping(ComponentSwizzle r, ComponentSwizzle g, ComponentSwiz
   Context::get().get_active_image_layer()->image_component_mapping(r, g, b, a);
 }
 
+void ImageYuvModelConversion(YuvModelConversion yuv_model_conversion) {
+  Context::get().get_active_image_layer()->image_yuv_model_conversion(yuv_model_conversion);
+}
+
+void ImageYuvRange(YuvRange yuv_range) {
+  Context::get().get_active_image_layer()->image_yuv_range(yuv_range);
+}
+
+void ImageChromaLocation(ChromaLocation x_chroma_location, ChromaLocation y_chroma_location) {
+  Context::get().get_active_image_layer()->image_chroma_location(x_chroma_location,
+                                                                 y_chroma_location);
+}
+
 void BeginImGuiLayer() {
   Context::get().begin_im_gui_layer();
 }
diff --git a/modules/holoviz/src/holoviz/holoviz.hpp b/modules/holoviz/src/holoviz/holoviz.hpp
index 12ea4e6..5a5d844 100644
--- a/modules/holoviz/src/holoviz/holoviz.hpp
+++ b/modules/holoviz/src/holoviz/holoviz.hpp
@@ -301,15 +301,26 @@ void BeginImageLayer();
  * the same layer. This enables depth-compositing image layers with other Holoviz layers.
  * Supported depth formats are: D16_UNORM, X8_D24_UNORM, D32_SFLOAT.
  *
+ * Supports multi-planar images (e.g. YUV), `device_ptr` and `row_pitch` specify the parameters
+ * for the first plane (plane 0), `device_ptr_n` and `row_pitch_n` for subsequent planes.
+ *
  * @param width         width of the image
  * @param height        height of the image
  * @param fmt           image format
  * @param device_ptr    CUDA device memory pointer
- * @param row_pitch     the number of bytes between each row, if zero then data is assumed to be
- * contiguous in memory
+ * @param row_pitch     the number of bytes between each row, if zero then data is
+ * assumed to be contiguous in memory
+ * @param device_ptr_plane_1    CUDA device memory pointer for plane 1
+ * @param row_pitch_1     the number of bytes between each row for plane 1, if zero then data is
+ * assumed to be contiguous in memory
+ * @param device_ptr_plane_2    CUDA device memory pointer for plane 2
+ * @param row_pitch_2     the number of bytes between each row for plane 2, if zero then data is
+ * assumed to be contiguous in memory
  */
 void ImageCudaDevice(uint32_t width, uint32_t height, ImageFormat fmt, CUdeviceptr device_ptr,
-                     size_t row_pitch = 0);
+                     size_t row_pitch = 0, CUdeviceptr device_ptr_plane_1 = 0,
+                     size_t row_pitch_plane_1 = 0, CUdeviceptr device_ptr_plane_2 = 0,
+                     size_t row_pitch_plane_2 = 0);
 
 /**
  * Defines the image data for this layer, source is a CUDA array.
@@ -336,15 +347,26 @@ void ImageCudaArray(ImageFormat fmt, CUarray array);
  * the same layer. This enables depth-compositing image layers with other Holoviz layers.
  * Supported depth formats are: D16_UNORM, X8_D24_UNORM, D32_SFLOAT.
  *
+ * Supports multi-planar images (e.g. YUV), `device_ptr` and `row_pitch` specify the parameters
+ * for the first plane (plane 0), `device_ptr_n` and `row_pitch_n` for subsequent planes.
+ *
  * @param width     width of the image
  * @param height    height of the image
  * @param fmt       image format
  * @param data      host memory pointer
  * @param row_pitch the number of bytes between each row, if zero then data is assumed to be
  * contiguous in memory
+ * @param data_plane_1      host memory pointer for plane 1
+ * @param row_pitch_plane_1 the number of bytes between each row for plane 1, if zero then data is
+ * assumed to be contiguous in memory
+ * @param data_plane_2      host memory pointer for plane 2
+ * @param row_pitch_plane_2 the number of bytes between each row for plane 2, if zero then data is
+ * assumed to be contiguous in memory
  */
 void ImageHost(uint32_t width, uint32_t height, ImageFormat fmt, const void* data,
-               size_t row_pitch = 0);
+               size_t row_pitch = 0, const void* data_plane_1 = nullptr,
+               size_t row_pitch_plane_1 = 0, const void* data_plane_2 = nullptr,
+               size_t row_pitch_plane_2 = 0);
 
 /**
  * Defines the lookup table for this image layer.
@@ -402,6 +424,30 @@ void LUT(uint32_t size, ImageFormat fmt, size_t data_size, const void* data,
 void ImageComponentMapping(ComponentSwizzle r, ComponentSwizzle g, ComponentSwizzle b,
                            ComponentSwizzle a);
 
+/**
+ * Specifies the YUV model conversion.
+ *
+ * @param yuv_model_conversion YUV model conversion. Default is `YUV_601`.
+ */
+void ImageYuvModelConversion(YuvModelConversion yuv_model_conversion);
+
+/**
+ * Specifies the YUV range.
+ *
+ * @param yuv_range YUV range. Default is `ITU_FULL`.
+ */
+void ImageYuvRange(YuvRange yuv_range);
+
+/**
+ * Defines the location of downsampled chroma component samples relative to the luma samples.
+ *
+ * @param x_chroma_location chroma location in x direction for formats which are chroma downsampled
+ * in width (420 and 422). Default is `COSITED_EVEN`.
+ * @param y_chroma_location chroma location in y direction for formats which are chroma downsampled
+ * in height (420). Default is `COSITED_EVEN`.
+ */
+void ImageChromaLocation(ChromaLocation x_chroma_location, ChromaLocation y_chroma_location);
+
 /**
  * Start a ImGUI layer.
  *
diff --git a/modules/holoviz/src/holoviz/image_format.hpp b/modules/holoviz/src/holoviz/image_format.hpp
index a99138c..b787938 100644
--- a/modules/holoviz/src/holoviz/image_format.hpp
+++ b/modules/holoviz/src/holoviz/image_format.hpp
@@ -38,6 +38,12 @@ namespace holoscan::viz {
  *   - SRGB - the R, G, and B components are unsigned normalized values that
  *            represent values using sRGB nonlinear encoding, while the A
  *            component (if one exists) is a regular unsigned normalized value
+ * - multi-planar formats
+ *   - 2PLANE - data is stored in two separate memory planes
+ *   - 3PLANE - data is stored in three separate memory planes
+ * - YUV formats
+ *   - 420 - the horizontal and vertical resolution of the chroma (UV) planes is halved
+ *   - 422 - the horizontal of the chroma (UV) planes is halved
  */
 enum class ImageFormat {
   R8_UINT,   ///< specifies a one-component, 8-bit unsigned integer format that has
@@ -171,6 +177,79 @@ enum class ImageFormat {
                           ///  in bits 8..15,
                           ///  and an 8-bit R component stored with sRGB nonlinear
                           ///  encoding in bits 0..7.
+
+  Y8U8Y8V8_422_UNORM,  ///< specifies a four-component, 32-bit format containing a pair of Y
+                       ///  components, a V component, and a U component, collectively encoding a
+                       ///  2×1 rectangle of unsigned normalized RGB texel data. One Y value is
+                       ///  present at each i coordinate, with the U and V values shared across both
+                       ///  Y values and thus recorded at half the horizontal resolution of the
+                       ///  image. This format has an 8-bit Y component for the even i coordinate in
+                       ///  byte 0, an 8-bit U component in byte 1, an 8-bit Y component for the odd
+                       ///  i coordinate in byte 2, and an 8-bit V component in byte 3. This format
+                       ///  only supports images with a width that is a multiple of two.
+  U8Y8V8Y8_422_UNORM,  ///< specifies a four-component, 32-bit format containing a pair of Y
+                       ///  components, a V component, and a U component, collectively encoding a
+                       ///  2×1 rectangle of unsigned normalized RGB texel data. One Y value is
+                       ///  present at each i coordinate, with the U and V values shared across both
+                       ///  Y values and thus recorded at half the horizontal resolution of the
+                       ///  image. This format has an 8-bit U component in byte 0, an 8-bit Y
+                       ///  component for the even i coordinate in byte 1, an 8-bit V component in
+                       ///  byte 2, and an 8-bit Y component for the odd i coordinate in byte 3.
+                       ///  This format only supports images with a width that is a multiple of two.
+  Y8_U8V8_2PLANE_420_UNORM,  ///< specifies an unsigned normalized multi-planar format that has an
+                             ///  8-bit Y component in plane 0, and a two-component, 16-bit UV plane
+                             ///  1 consisting of an 8-bit U component in byte 0 and an 8-bit V
+                             ///  component in byte 1. The horizontal and vertical dimensions of the
+                             ///  UV plane are halved relative to the image dimensions. This format
+                             ///  only supports images with a width and height that are a multiple
+                             ///  of two.
+  Y8_U8V8_2PLANE_422_UNORM,  ///< specifies an unsigned normalized multi-planar format that has an
+                             ///  8-bit Y component in plane 0, and a two-component, 16-bit UV plane
+                             ///  1 consisting of an 8-bit U component in byte 0 and an 8-bit V
+                             ///  component in byte 1. The horizontal dimension of the UV plane is
+                             ///  halved relative to the image dimensions. This format only supports
+                             ///  images with a width that is a multiple of two.
+  Y8_U8_V8_3PLANE_420_UNORM,  ///< specifies an unsigned normalized multi-planar format that has an
+                              ///  8-bit Y component in plane 0, an 8-bit U component in plane 1,
+                              ///  and an 8-bit V component in plane 2. The horizontal and vertical
+                              ///  dimensions of the V and U planes are halved relative to the image
+                              ///  dimensions. This format only supports images with a width and
+                              ///  height that are a multiple of two.
+  Y8_U8_V8_3PLANE_422_UNORM,  ///< specifies an unsigned normalized multi-planar format that has an
+                              ///  8-bit Y component in plane 0, an 8-bit U component in plane 1,
+                              ///  and an 8-bit V component in plane 2. The horizontal dimension of
+                              ///  the V and U plane is halved relative to the image dimensions.
+                              ///  This format only supports images with a width that is a multiple
+                              ///  of two.
+  Y16_U16V16_2PLANE_420_UNORM,  ///< specifies an unsigned normalized multi-planar format that has a
+                                ///  16-bit Y component in each 16-bit word of plane 0, and a
+                                ///  two-component, 32-bit UV plane 1 consisting of a 16-bit U
+                                ///  component in the word in bytes 0..1, and a 16-bit V component
+                                ///  in the word in bytes 2..3. The horizontal and vertical
+                                ///  dimensions of the UV plane are halved relative to the image
+                                ///  dimensions. This format only supports images with a width and
+                                ///  height that are a multiple of two.
+  Y16_U16V16_2PLANE_422_UNORM,  ///< specifies an unsigned normalized multi-planar format that has a
+                                ///  16-bit Y component in each 16-bit word of plane 0, and a
+                                ///  two-component, 32-bit UV plane 1 consisting of a 16-bit U
+                                ///  component in the word in bytes 0..1, and a 16-bit V component
+                                ///  in the word in bytes 2..3. The horizontal dimension of the UV
+                                ///  plane is halved relative to the image dimensions. This format
+                                ///  only supports images with a width that is a multiple of two.
+  Y16_U16_V16_3PLANE_420_UNORM,  ///< specifies an unsigned normalized multi-planar format that has
+                                 ///  a 16-bit Y component in each 16-bit word of plane 0, a 16-bit
+                                 ///  U component in each 16-bit word of plane 1, and a 16-bit V
+                                 ///  component in each 16-bit word of plane 2. The horizontal and
+                                 ///  vertical dimensions of the V and U planes are halved relative
+                                 ///  to the image dimensions. This format only supports images with
+                                 ///  a width and height that are a multiple of two.
+  Y16_U16_V16_3PLANE_422_UNORM,  ///< specifies an unsigned normalized multi-planar format that has
+                                 ///  a 16-bit Y component in each 16-bit word of plane 0, a 16-bit
+                                 ///  U component in each 16-bit word of plane 1, and a 16-bit V
+                                 ///  component in each 16-bit word of plane 2. The horizontal
+                                 ///  dimension of the V and U plane is halved relative to the image
+                                 ///  dimensions. This format only supports images with a width that
+                                 ///  is a multiple of two.
 };
 
 /**
@@ -195,6 +274,36 @@ enum class ComponentSwizzle {
   A     ///< specifies that the component is set to the value of the A component of the image
 };
 
+/**
+ * Defines the conversion from the source color model to the shader color model.
+ */
+enum class YuvModelConversion {
+  YUV_601,   ///< specifies the color model conversion from YUV to RGB defined in BT.601
+  YUV_709,   ///< specifies the color model conversion from YUV to RGB defined in BT.709
+  YUV_2020,  ///< specifies the color model conversion from YUV to RGB defined in BT.2020
+};
+
+/**
+ * Specifies the YUV range
+ */
+enum class YuvRange {
+  ITU_FULL,    ///< specifies that the full range of the encoded values are valid and
+               ///< interpreted according to the ITU “full range” quantization rules
+  ITU_NARROW,  ///< specifies that headroom and foot room are reserved in the numerical range
+               ///< of encoded values, and the remaining values are expanded according to the
+               ///< ITU “narrow range” quantization rules
+};
+
+/**
+ * Defines the location of downsampled chroma component samples relative to the luma samples
+ */
+enum class ChromaLocation {
+  COSITED_EVEN,  ///< specifies that downsampled chroma samples are aligned with luma samples with
+                 ///< even coordinates
+  MIDPOINT,  ///< specifies that downsampled chroma samples are located half way between each even
+             ///< luma sample and the nearest higher odd luma sample.
+};
+
 }  // namespace holoscan::viz
 
 #endif /* MODULES_HOLOVIZ_SRC_HOLOVIZ_IMAGE_FORMAT_HPP */
diff --git a/modules/holoviz/src/layers/geometry_layer.cpp b/modules/holoviz/src/layers/geometry_layer.cpp
index f2d6bdb..196814c 100644
--- a/modules/holoviz/src/layers/geometry_layer.cpp
+++ b/modules/holoviz/src/layers/geometry_layer.cpp
@@ -187,41 +187,24 @@ class GeometryLayer::Impl {
   std::list<class DepthMap> depth_maps_;
 
   // internal state
-  Vulkan* vulkan_ = nullptr;
-
   float aspect_ratio_ = 1.f;
 
   size_t vertex_count_ = 0;
-  Buffer* vertex_buffer_ = nullptr;
+  std::unique_ptr<Buffer> vertex_buffer_;
 
   std::unique_ptr<ImDrawList> text_draw_list_;
-  Buffer* text_vertex_buffer_ = nullptr;
-  Buffer* text_index_buffer_ = nullptr;
+  std::unique_ptr<Buffer> text_vertex_buffer_;
+  std::unique_ptr<Buffer> text_index_buffer_;
 
   size_t depth_map_vertex_count_ = 0;
-  Buffer* depth_map_vertex_buffer_ = nullptr;
-  Buffer* depth_map_index_buffer_ = nullptr;
-  Buffer* depth_map_color_buffer_ = nullptr;
+  std::unique_ptr<Buffer> depth_map_vertex_buffer_;
+  std::unique_ptr<Buffer> depth_map_index_buffer_;
+  std::unique_ptr<Buffer> depth_map_color_buffer_;
 };
 
 GeometryLayer::GeometryLayer() : Layer(Type::Geometry), impl_(new GeometryLayer::Impl) {}
 
-GeometryLayer::~GeometryLayer() {
-  if (impl_->vulkan_) {
-    if (impl_->vertex_buffer_) { impl_->vulkan_->destroy_buffer(impl_->vertex_buffer_); }
-    if (impl_->text_vertex_buffer_) { impl_->vulkan_->destroy_buffer(impl_->text_vertex_buffer_); }
-    if (impl_->text_index_buffer_) { impl_->vulkan_->destroy_buffer(impl_->text_index_buffer_); }
-    if (impl_->depth_map_vertex_buffer_) {
-      impl_->vulkan_->destroy_buffer(impl_->depth_map_vertex_buffer_);
-    }
-    if (impl_->depth_map_index_buffer_) {
-      impl_->vulkan_->destroy_buffer(impl_->depth_map_index_buffer_);
-    }
-    if (impl_->depth_map_color_buffer_) {
-      impl_->vulkan_->destroy_buffer(impl_->depth_map_color_buffer_);
-    }
-  }
-}
+GeometryLayer::~GeometryLayer() {}
 
 void GeometryLayer::color(float r, float g, float b, float a) {
   impl_->attributes_.color_[0] = r;
@@ -372,14 +355,8 @@ void GeometryLayer::end(Vulkan* vulkan) {
     impl_->aspect_ratio_ = vulkan->get_window()->get_aspect_ratio();
 
     impl_->text_draw_list_.reset();
-    if (impl_->text_vertex_buffer_) {
-      impl_->vulkan_->destroy_buffer(impl_->text_vertex_buffer_);
-      impl_->text_vertex_buffer_ = nullptr;
-    }
-    if (impl_->text_index_buffer_) {
-      impl_->vulkan_->destroy_buffer(impl_->text_index_buffer_);
-      impl_->text_index_buffer_ = nullptr;
-    }
+    impl_->text_vertex_buffer_.reset();
+    impl_->text_index_buffer_.reset();
 
     // only crosses depend on the aspect ratio
     bool has_crosses = false;
@@ -389,20 +366,11 @@ void GeometryLayer::end(Vulkan* vulkan) {
         break;
       }
     }
-    if (has_crosses) {
-      if (impl_->vertex_buffer_) {
-        impl_->vulkan_->destroy_buffer(impl_->vertex_buffer_);
-        impl_->vertex_buffer_ = nullptr;
-      }
-    }
+    if (has_crosses) { impl_->vertex_buffer_.reset(); }
   }
 
   if (!impl_->primitives_.empty()) {
     if (!impl_->vertex_buffer_) {
-      /// @todo need to remember Vulkan instance for destroying buffer,
-      ///       destroy should probably be handled by Vulkan class
-      impl_->vulkan_ = vulkan;
-
       // setup the vertex buffer
       std::vector<float> vertices;
       vertices.reserve(impl_->vertex_count_ * 3);
@@ -512,10 +480,6 @@ void GeometryLayer::end(Vulkan* vulkan) {
       // text might be completely out of clip rectangle,
       //      if this is the case no vertices had been generated
       if (impl_->text_draw_list_->VtxBuffer.size() != 0) {
-        /// @todo need to remember Vulkan instance for destroying buffer, destroy should
-        //        probably be handled by Vulkan class
-        impl_->vulkan_ = vulkan;
-
         impl_->text_vertex_buffer_ =
             vulkan->create_buffer(impl_->text_draw_list_->VtxBuffer.size() * sizeof(ImDrawVert),
                                   impl_->text_draw_list_->VtxBuffer.Data,
@@ -533,10 +497,6 @@ void GeometryLayer::end(Vulkan* vulkan) {
   if (!impl_->depth_maps_.empty()) {
     // allocate vertex buffer
     if (!impl_->depth_map_vertex_buffer_) {
-      /// @todo need to remember Vulkan instance for destroying buffer, destroy should probably be
-      /// handled by Vulkan class
-      impl_->vulkan_ = vulkan;
-
       // calculate the index count needed
       size_t index_count = 0;
       bool has_color_buffer = false;
@@ -569,9 +529,8 @@ void GeometryLayer::end(Vulkan* vulkan) {
 
       if (index_count) {
         // generate index data
-        impl_->depth_map_index_buffer_ =
-            vulkan->create_buffer_for_cuda_interop(index_count * sizeof(uint32_t),
-                                  vk::BufferUsageFlagBits::eIndexBuffer);
+        impl_->depth_map_index_buffer_ = vulkan->create_buffer_for_cuda_interop(
+            index_count * sizeof(uint32_t), vk::BufferUsageFlagBits::eIndexBuffer);
 
         CudaService* const cuda_service = vulkan->get_cuda_service();
         const CudaService::ScopedPush cuda_context = cuda_service->PushContext();
@@ -583,11 +542,8 @@ void GeometryLayer::end(Vulkan* vulkan) {
         for (auto&& depth_map : impl_->depth_maps_) {
           depth_map.index_offset_ = offset;
           const CUdeviceptr dst = impl_->depth_map_index_buffer_->device_ptr_.get() + offset;
-          offset += GenDepthMapIndices(depth_map.render_mode_,
-                            depth_map.width_,
-                            depth_map.height_,
-                            dst,
-                            stream);
+          offset += GenDepthMapIndices(
+              depth_map.render_mode_, depth_map.width_, depth_map.height_, dst, stream);
         }
         // indicate that the index buffer had been used by CUDA
         impl_->depth_map_index_buffer_->end_access_with_cuda(stream);
@@ -640,7 +596,7 @@ void GeometryLayer::end(Vulkan* vulkan) {
       if (depth_map.color_device_ptr_) {
         vulkan->upload_to_buffer(size,
                                  depth_map.color_device_ptr_,
-                                 impl_->depth_map_color_buffer_,
+                                 impl_->depth_map_color_buffer_.get(),
                                  offset,
                                  depth_map.cuda_stream_);
       }
@@ -678,7 +634,7 @@ void GeometryLayer::render(Vulkan* vulkan) {
         vulkan->draw(primitive.vk_topology_,
                      vertex_count,
                      vertex_offset,
-                     {impl_->vertex_buffer_},
+                     {impl_->vertex_buffer_.get()},
                      get_opacity(),
                      primitive.attributes_.color_,
                      primitive.attributes_.point_size_,
@@ -694,8 +650,8 @@ void GeometryLayer::render(Vulkan* vulkan) {
         const ImDrawCmd* pcmd = &impl_->text_draw_list_->CmdBuffer[i];
         vulkan->draw_imgui(
             vk::DescriptorSet(reinterpret_cast<VkDescriptorSet>(ImGui::GetIO().Fonts->TexID)),
-            impl_->text_vertex_buffer_,
-            impl_->text_index_buffer_,
+            impl_->text_vertex_buffer_.get(),
+            impl_->text_index_buffer_.get(),
             (sizeof(ImDrawIdx) == 2) ? vk::IndexType::eUint16 : vk::IndexType::eUint32,
             pcmd->ElemCount,
             pcmd->IdxOffset,
@@ -709,9 +665,9 @@ void GeometryLayer::render(Vulkan* vulkan) {
     if (!impl_->depth_maps_.empty()) {
       for (auto&& depth_map : impl_->depth_maps_) {
         std::vector<Buffer*> vertex_buffers;
-        vertex_buffers.push_back(impl_->depth_map_vertex_buffer_);
+        vertex_buffers.push_back(impl_->depth_map_vertex_buffer_.get());
         if (depth_map.color_device_ptr_) {
-          vertex_buffers.push_back(impl_->depth_map_color_buffer_);
+          vertex_buffers.push_back(impl_->depth_map_color_buffer_.get());
         }
 
         if ((depth_map.render_mode_ == DepthMapRenderMode::LINES) ||
@@ -720,7 +676,7 @@ void GeometryLayer::render(Vulkan* vulkan) {
                                    ? vk::PrimitiveTopology::eLineList
                                    : vk::PrimitiveTopology::eTriangleList,
                                vertex_buffers,
-                               impl_->depth_map_index_buffer_,
+                               impl_->depth_map_index_buffer_.get(),
                                vk::IndexType::eUint32,
                                depth_map.index_count_,
                                depth_map.index_offset_,
diff --git a/modules/holoviz/src/layers/im_gui_layer.cpp b/modules/holoviz/src/layers/im_gui_layer.cpp
index 84381ca..de675be 100644
--- a/modules/holoviz/src/layers/im_gui_layer.cpp
+++ b/modules/holoviz/src/layers/im_gui_layer.cpp
@@ -23,6 +23,7 @@
 #include <memory>
 #include <vector>
 
+#include "../vulkan/buffer.hpp"
 #include "../vulkan/vulkan_app.hpp"
 
 namespace holoscan::viz {
@@ -30,19 +31,13 @@ namespace holoscan::viz {
 struct ImGuiLayer::Impl {
   const ImDrawData* draw_data_ = nullptr;
 
-  Vulkan* vulkan_ = nullptr;
-  Buffer* vertex_buffer_ = nullptr;
-  Buffer* index_buffer_ = nullptr;
+  std::unique_ptr<Buffer> vertex_buffer_;
+  std::unique_ptr<Buffer> index_buffer_;
 };
 
 ImGuiLayer::ImGuiLayer() : Layer(Type::ImGui), impl_(new ImGuiLayer::Impl) {}
 
-ImGuiLayer::~ImGuiLayer() {
-  if (impl_->vulkan_) {
-    if (impl_->vertex_buffer_) { impl_->vulkan_->destroy_buffer(impl_->vertex_buffer_); }
-    if (impl_->index_buffer_) { impl_->vulkan_->destroy_buffer(impl_->index_buffer_); }
-  }
-}
+ImGuiLayer::~ImGuiLayer() {}
 
 void ImGuiLayer::set_opacity(float opacity) {
   // call the base class
@@ -78,12 +73,8 @@ void ImGuiLayer::end(Vulkan* vulkan) {
 
     // create device buffers from vertex and index data
 
-    /// @todo need to remember Vulkan instance for destroying buffer,
-    ///       destroy should probably be handled by Vulkan class
-    impl_->vulkan_ = vulkan;
-
-    if (impl_->vertex_buffer_) { impl_->vulkan_->destroy_buffer(impl_->vertex_buffer_); }
-    if (impl_->index_buffer_) { impl_->vulkan_->destroy_buffer(impl_->index_buffer_); }
+    impl_->vertex_buffer_.reset();
+    impl_->index_buffer_.reset();
 
     impl_->vertex_buffer_ =
         vulkan->create_buffer(impl_->draw_data_->TotalVtxCount * sizeof(ImDrawVert),
@@ -129,8 +120,8 @@ void ImGuiLayer::render(Vulkan* vulkan) {
         const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[draw_cmd_index];
         vulkan->draw_imgui(
             vk::DescriptorSet(reinterpret_cast<VkDescriptorSet>(ImGui::GetIO().Fonts->TexID)),
-            impl_->vertex_buffer_,
-            impl_->index_buffer_,
+            impl_->vertex_buffer_.get(),
+            impl_->index_buffer_.get(),
             (sizeof(ImDrawIdx) == 2) ? vk::IndexType::eUint16 : vk::IndexType::eUint32,
             pcmd->ElemCount,
             pcmd->IdxOffset + index_offset,
diff --git a/modules/holoviz/src/layers/image_layer.cpp b/modules/holoviz/src/layers/image_layer.cpp
index b11ff73..a2269dc 100644
--- a/modules/holoviz/src/layers/image_layer.cpp
+++ b/modules/holoviz/src/layers/image_layer.cpp
@@ -17,33 +17,51 @@
 
 #include "image_layer.hpp"
 
+#include <array>
+#include <memory>
 #include <stdexcept>
 #include <vector>
 
 #include <nvvk/resourceallocator_vk.hpp>
 
 #include "../context.hpp"
+#include "../vulkan/format_util.hpp"
+#include "../vulkan/texture.hpp"
 #include "../vulkan/vulkan_app.hpp"
 
 namespace holoscan::viz {
 
-namespace {
+class SourceData {
+ public:
+  SourceData() = default;
+
+  /**
+   * @returns true if the type of source (device/host) is the same (or all sources are null)
+   */
+  bool same_type(const SourceData& other) const {
+    return (!has_host_memory() && !has_device_memory()) ||
+           ((has_host_memory() == !other.has_device_memory()) &&
+            (has_device_memory() == !other.has_host_memory()));
+  }
 
-/// @return true if fmt is a depth format
-bool is_depth_format(ImageFormat fmt) {
-  return ((fmt == ImageFormat::D16_UNORM) || (fmt == ImageFormat::X8_D24_UNORM) ||
-          (fmt == ImageFormat::D32_SFLOAT));
-}
-}  // namespace
+  bool has_host_memory() const { return (host_ptr_[0] != nullptr); }
+
+  bool has_device_memory() const { return (device_ptr_[0] != 0); }
+
+  std::array<CUdeviceptr, 3> device_ptr_{};
+  std::array<const void*, 3> host_ptr_{};
+  std::array<size_t, 3> row_pitch_{};
+};
 
 struct ImageLayer::Impl {
   bool can_be_reused(Impl& other) const {
     // we can reuse if the format/component mapping/size and LUT match and
     //  if we did not switch from host to device memory and vice versa
     if ((format_ == other.format_) && (component_mapping_ == other.component_mapping_) &&
-        (width_ == other.width_) && (height_ == other.height_) &&
-        ((host_ptr_ != nullptr) == (other.device_ptr_ == 0)) &&
-        ((device_ptr_ != 0) == (other.host_ptr_ == nullptr)) &&
+        (ycbcr_model_conversion_ == other.ycbcr_model_conversion_) &&
+        (ycbcr_range_ == other.ycbcr_range_) && (x_chroma_location_ == other.x_chroma_location_) &&
+        (y_chroma_location_ == other.y_chroma_location_) && (width_ == other.width_) &&
+        (height_ == other.height_) && source_data_.same_type(other.source_data_) &&
         (depth_format_ == other.depth_format_) && (depth_width_ == other.depth_width_) &&
         (depth_height_ == other.depth_height_) &&
         ((!depth_host_ptr_ && !depth_device_ptr_) ||
@@ -55,9 +73,7 @@ struct ImageLayer::Impl {
       // Data will be uploaded when drawing regardless if the layer is reused or not
       /// @todo this should be made explicit, first check if the layer can be reused and then
       ///     update the reused layer with these properties below which don't prevent reusing
-      other.host_ptr_ = host_ptr_;
-      other.device_ptr_ = device_ptr_;
-      other.row_pitch_ = row_pitch_;
+      other.source_data_ = source_data_;
       other.cuda_stream_ = cuda_stream_;
 
       other.depth_host_ptr_ = depth_host_ptr_;
@@ -73,11 +89,14 @@ struct ImageLayer::Impl {
   // user provided state
   ImageFormat format_ = ImageFormat(-1);
   vk::ComponentMapping component_mapping_;
+  vk::SamplerYcbcrModelConversion ycbcr_model_conversion_ =
+      vk::SamplerYcbcrModelConversion::eYcbcr601;
+  vk::SamplerYcbcrRange ycbcr_range_ = vk::SamplerYcbcrRange::eItuFull;
+  vk::ChromaLocation x_chroma_location_ = vk::ChromaLocation::eCositedEven;
+  vk::ChromaLocation y_chroma_location_ = vk::ChromaLocation::eCositedEven;
   uint32_t width_ = 0;
   uint32_t height_ = 0;
-  CUdeviceptr device_ptr_ = 0;
-  const void* host_ptr_ = nullptr;
-  size_t row_pitch_ = 0;
+  SourceData source_data_;
   CUstream cuda_stream_ = 0;
 
   ImageFormat depth_format_ = ImageFormat(-1);
@@ -94,24 +113,19 @@ struct ImageLayer::Impl {
   bool lut_normalized_ = false;
 
   // internal state
-  Vulkan* vulkan_ = nullptr;
-  Texture* texture_ = nullptr;
-  Texture* depth_texture_ = nullptr;
-  Texture* lut_texture_ = nullptr;
+  std::unique_ptr<Texture> texture_;
+  std::unique_ptr<Texture> depth_texture_;
+  std::unique_ptr<Texture> lut_texture_;
 };
 
 ImageLayer::ImageLayer() : Layer(Type::Image), impl_(new ImageLayer::Impl) {}
 
-ImageLayer::~ImageLayer() {
-  if (impl_->vulkan_) {
-    if (impl_->texture_) { impl_->vulkan_->destroy_texture(impl_->texture_); }
-    if (impl_->depth_texture_) { impl_->vulkan_->destroy_texture(impl_->depth_texture_); }
-    if (impl_->lut_texture_) { impl_->vulkan_->destroy_texture(impl_->lut_texture_); }
-  }
-}
+ImageLayer::~ImageLayer() {}
 
 void ImageLayer::image_cuda_device(uint32_t width, uint32_t height, ImageFormat fmt,
-                                   CUdeviceptr device_ptr, size_t row_pitch) {
+                                   CUdeviceptr device_ptr_plane_0, size_t row_pitch_plane_0,
+                                   CUdeviceptr device_ptr_plane_1, size_t row_pitch_plane_1,
+                                   CUdeviceptr device_ptr_plane_2, size_t row_pitch_plane_2) {
   // If a depth format is specified, use this image to write depth for the color image.
   if (is_depth_format(fmt)) {
     if (impl_->depth_host_ptr_) {
@@ -121,30 +135,36 @@ void ImageLayer::image_cuda_device(uint32_t width, uint32_t height, ImageFormat
     impl_->depth_width_ = width;
     impl_->depth_height_ = height;
     impl_->depth_format_ = ImageFormat::R32_SFLOAT;  // use a color format for sampling
-    impl_->depth_device_ptr_ = device_ptr;
+    impl_->depth_device_ptr_ = device_ptr_plane_0;
     impl_->depth_cuda_stream_ = Context::get().get_cuda_stream();
-    impl_->row_pitch_ = row_pitch;
+    impl_->depth_row_pitch_ = row_pitch_plane_0;
     return;
   }
 
-  if (impl_->host_ptr_) {
+  if (impl_->source_data_.has_host_memory()) {
     throw std::runtime_error("Can't simultaneously specify device and host image for a layer.");
   }
 
   impl_->width_ = width;
   impl_->height_ = height;
   impl_->format_ = fmt;
-  impl_->device_ptr_ = device_ptr;
+  impl_->source_data_.device_ptr_[0] = device_ptr_plane_0;
+  impl_->source_data_.row_pitch_[0] = row_pitch_plane_0;
+  impl_->source_data_.device_ptr_[1] = device_ptr_plane_1;
+  impl_->source_data_.row_pitch_[1] = row_pitch_plane_1;
+  impl_->source_data_.device_ptr_[2] = device_ptr_plane_2;
+  impl_->source_data_.row_pitch_[2] = row_pitch_plane_2;
   impl_->cuda_stream_ = Context::get().get_cuda_stream();
-  impl_->row_pitch_ = row_pitch;
 }
 
 void ImageLayer::image_cuda_array(ImageFormat fmt, CUarray array) {
   throw std::runtime_error("Not implemented");
 }
 
-void ImageLayer::image_host(uint32_t width, uint32_t height, ImageFormat fmt, const void* data,
-                            size_t row_pitch) {
+void ImageLayer::image_host(uint32_t width, uint32_t height, ImageFormat fmt,
+                            const void* data_plane_0, size_t row_pitch_plane_0,
+                            const void* data_plane_1, size_t row_pitch_plane_1,
+                            const void* data_plane_2, size_t row_pitch_plane_2) {
   // If a depth format is specified, use this image to write depth for the color image.
   if (is_depth_format(fmt)) {
     if (impl_->depth_device_ptr_) {
@@ -154,21 +174,25 @@ void ImageLayer::image_host(uint32_t width, uint32_t height, ImageFormat fmt, co
     impl_->depth_width_ = width;
     impl_->depth_height_ = height;
     impl_->depth_format_ = ImageFormat::R32_SFLOAT;  // use a color format for sampling
-    impl_->depth_host_ptr_ = data;
+    impl_->depth_host_ptr_ = data_plane_0;
     impl_->depth_cuda_stream_ = Context::get().get_cuda_stream();
-    impl_->depth_row_pitch_ = row_pitch;
+    impl_->depth_row_pitch_ = row_pitch_plane_0;
     return;
   }
 
-  if (impl_->device_ptr_) {
+  if (impl_->source_data_.has_device_memory()) {
     throw std::runtime_error("Can't simultaneously specify device and host image for a layer.");
   }
 
   impl_->width_ = width;
   impl_->height_ = height;
   impl_->format_ = fmt;
-  impl_->host_ptr_ = data;
-  impl_->row_pitch_ = row_pitch;
+  impl_->source_data_.host_ptr_[0] = data_plane_0;
+  impl_->source_data_.row_pitch_[0] = row_pitch_plane_0;
+  impl_->source_data_.host_ptr_[1] = data_plane_1;
+  impl_->source_data_.row_pitch_[1] = row_pitch_plane_1;
+  impl_->source_data_.host_ptr_[2] = data_plane_2;
+  impl_->source_data_.row_pitch_[2] = row_pitch_plane_2;
 }
 
 void ImageLayer::lut(uint32_t size, ImageFormat fmt, size_t data_size, const void* data,
@@ -208,17 +232,67 @@ void ImageLayer::image_component_mapping(ComponentSwizzle r, ComponentSwizzle g,
   impl_->component_mapping_.a = to_vk_swizzle(a);
 }
 
+void ImageLayer::image_yuv_model_conversion(YuvModelConversion yuv_model_conversion) {
+  switch (yuv_model_conversion) {
+    case YuvModelConversion::YUV_601:
+      impl_->ycbcr_model_conversion_ = vk::SamplerYcbcrModelConversion::eYcbcr601;
+      break;
+    case YuvModelConversion::YUV_709:
+      impl_->ycbcr_model_conversion_ = vk::SamplerYcbcrModelConversion::eYcbcr709;
+      break;
+    case YuvModelConversion::YUV_2020:
+      impl_->ycbcr_model_conversion_ = vk::SamplerYcbcrModelConversion::eYcbcr2020;
+      break;
+    default:
+      throw std::runtime_error("Unhandled yuv model conversion.");
+  }
+}
+
+void ImageLayer::image_yuv_range(YuvRange yuv_range) {
+  switch (yuv_range) {
+    case YuvRange::ITU_FULL:
+      impl_->ycbcr_range_ = vk::SamplerYcbcrRange::eItuFull;
+      break;
+    case YuvRange::ITU_NARROW:
+      impl_->ycbcr_range_ = vk::SamplerYcbcrRange::eItuNarrow;
+      break;
+    default:
+      throw std::runtime_error("Unhandled yuv range.");
+  }
+}
+
+void ImageLayer::image_chroma_location(ChromaLocation x_chroma_location,
+                                       ChromaLocation y_chroma_location) {
+  switch (x_chroma_location) {
+    case ChromaLocation::COSITED_EVEN:
+      impl_->x_chroma_location_ = vk::ChromaLocation::eCositedEven;
+      break;
+    case ChromaLocation::MIDPOINT:
+      impl_->x_chroma_location_ = vk::ChromaLocation::eMidpoint;
+      break;
+    default:
+      throw std::runtime_error("Unhandled chroma location.");
+  }
+
+  switch (y_chroma_location) {
+    case ChromaLocation::COSITED_EVEN:
+      impl_->y_chroma_location_ = vk::ChromaLocation::eCositedEven;
+      break;
+    case ChromaLocation::MIDPOINT:
+      impl_->y_chroma_location_ = vk::ChromaLocation::eMidpoint;
+      break;
+    default:
+      throw std::runtime_error("Unhandled chroma location.");
+  }
+}
+
 bool ImageLayer::can_be_reused(Layer& other) const {
   return Layer::can_be_reused(other) &&
          impl_->can_be_reused(*static_cast<const ImageLayer&>(other).impl_.get());
 }
 
 void ImageLayer::end(Vulkan* vulkan) {
-  /// @todo need to remember Vulkan instance for destroying texture,
-  ///       destroy should probably be handled by Vulkan class
-  impl_->vulkan_ = vulkan;
-
-  if (impl_->device_ptr_) {
+  if (impl_->source_data_.has_device_memory() || impl_->source_data_.has_host_memory()) {
     // check if this is a reused layer, in this case
     //  we just have to upload the data to the texture
     if (!impl_->texture_) {
@@ -226,80 +300,70 @@ void ImageLayer::end(Vulkan* vulkan) {
       //  be nearest sampled since it has index values
       const bool has_lut = !impl_->lut_data_.empty();
 
+      Vulkan::CreateTextureArgs args;
+      args.width_ = impl_->width_;
+      args.height_ = impl_->height_;
+      args.format_ = impl_->format_;
+      args.component_mapping_ = impl_->component_mapping_;
+      args.filter_ = has_lut ? vk::Filter::eNearest : vk::Filter::eLinear;
+      args.cuda_interop_ = impl_->source_data_.has_device_memory();
+      args.ycbcr_model_conversion_ = impl_->ycbcr_model_conversion_;
+      args.x_chroma_location_ = impl_->x_chroma_location_;
+      args.y_chroma_location_ = impl_->y_chroma_location_;
+      args.ycbcr_range_ = impl_->ycbcr_range_;
+
       // create a texture to which we can upload from CUDA
-      impl_->texture_ = vulkan->create_texture_for_cuda_interop(
-          impl_->width_,
-          impl_->height_,
-          impl_->format_,
-          impl_->component_mapping_,
-          has_lut ? vk::Filter::eNearest : vk::Filter::eLinear);
+      impl_->texture_ = vulkan->create_texture(args);
     }
-    vulkan->upload_to_texture(
-        impl_->device_ptr_, impl_->row_pitch_, impl_->texture_, impl_->cuda_stream_);
-  } else if (impl_->host_ptr_) {
-    // check if this is a reused layer,
-    //  in this case we just have to upload the data to the texture
-    if (!impl_->texture_) {
-      // check if we have a lut, if yes, the texture needs to be
-      //  nearest sampled since it has index values
-      const bool has_lut = !impl_->lut_data_.empty();
 
-      // create a texture to which we can upload from host memory
-      impl_->texture_ =
-          vulkan->create_texture(impl_->width_,
-                                 impl_->height_,
-                                 impl_->format_,
-                                 0,
-                                 nullptr,
-                                 impl_->component_mapping_,
-                                 has_lut ? vk::Filter::eNearest : vk::Filter::eLinear);
+    if (impl_->source_data_.has_device_memory()) {
+      impl_->texture_->upload(
+          impl_->cuda_stream_, impl_->source_data_.device_ptr_, impl_->source_data_.row_pitch_);
+    } else {
+      assert(impl_->source_data_.has_host_memory());
+      vulkan->upload_to_texture(
+          impl_->texture_.get(), impl_->source_data_.host_ptr_, impl_->source_data_.row_pitch_);
     }
-    vulkan->upload_to_texture(impl_->host_ptr_, impl_->row_pitch_, impl_->texture_);
   }
 
-  if (impl_->depth_device_ptr_) {
+  if (impl_->depth_device_ptr_ || impl_->depth_host_ptr_) {
     // check if this is a reused layer, in this case
     //  we just have to upload the data to the texture
     if (!impl_->depth_texture_) {
       // create a texture to which we can upload from CUDA
-      impl_->depth_texture_ = vulkan->create_texture_for_cuda_interop(impl_->depth_width_,
-                                                                      impl_->depth_height_,
-                                                                      impl_->depth_format_,
-                                                                      vk::ComponentMapping(),
-                                                                      vk::Filter::eLinear);
+      Vulkan::CreateTextureArgs args;
+      args.width_ = impl_->depth_width_;
+      args.height_ = impl_->depth_height_;
+      args.format_ = impl_->depth_format_;
+      args.filter_ = vk::Filter::eLinear;
+      args.cuda_interop_ = impl_->depth_device_ptr_ != 0;
+
+      impl_->depth_texture_ = vulkan->create_texture(args);
     }
-    vulkan->upload_to_texture(impl_->depth_device_ptr_,
-                              impl_->depth_row_pitch_,
-                              impl_->depth_texture_,
-                              impl_->depth_cuda_stream_);
-  } else if (impl_->depth_host_ptr_) {
-    // check if this is a reused layer,
-    //  in this case we just have to upload the data to the texture
-    if (!impl_->depth_texture_) {
-      // create a texture to which we can upload from host memory
-      impl_->depth_texture_ = vulkan->create_texture(impl_->depth_width_,
-                                                     impl_->depth_height_,
-                                                     impl_->depth_format_,
-                                                     0,
-                                                     nullptr,
-                                                     vk::ComponentMapping(),
-                                                     vk::Filter::eLinear);
+
+    if (impl_->depth_device_ptr_) {
+      impl_->depth_texture_->upload(
+          impl_->depth_cuda_stream_, {impl_->depth_device_ptr_}, {impl_->depth_row_pitch_});
+    } else {
+      assert(impl_->depth_host_ptr_);
+      vulkan->upload_to_texture(
+          impl_->depth_texture_.get(), {impl_->depth_host_ptr_}, {impl_->depth_row_pitch_});
     }
-    vulkan->upload_to_texture(
-        impl_->depth_host_ptr_, impl_->depth_row_pitch_, impl_->depth_texture_);
   }
 
   if (!impl_->lut_data_.empty() && !impl_->lut_texture_) {
     // create LUT texture
-    impl_->lut_texture_ =
-        vulkan->create_texture(impl_->lut_size_,
-                               1,
-                               impl_->lut_format_,
-                               impl_->lut_data_.size(),
-                               impl_->lut_data_.data(),
-                               vk::ComponentMapping(),
-                               impl_->lut_normalized_ ? vk::Filter::eLinear : vk::Filter::eNearest,
-                               impl_->lut_normalized_);
+    Vulkan::CreateTextureArgs args;
+    args.width_ = impl_->lut_size_;
+    args.height_ = 1;
+    args.format_ = impl_->lut_format_;
+    args.filter_ = impl_->lut_normalized_ ? vk::Filter::eLinear : vk::Filter::eNearest;
+    args.normalized_ = impl_->lut_normalized_;
+
+    impl_->lut_texture_ = vulkan->create_texture(args);
+
+    vulkan->upload_to_texture(
+        impl_->lut_texture_.get(), {impl_->lut_data_.data()}, {impl_->lut_data_.size()});
   }
 }
 
@@ -317,8 +381,11 @@ void ImageLayer::render(Vulkan* vulkan) {
         view_matrix = nvmath::mat4f(1);
       }
       // draw
-      vulkan->draw_texture(
-          impl_->texture_, impl_->depth_texture_, impl_->lut_texture_, get_opacity(), view_matrix);
+      vulkan->draw_texture(impl_->texture_.get(),
+                           impl_->depth_texture_.get(),
+                           impl_->lut_texture_.get(),
+                           get_opacity(),
+                           view_matrix);
     }
   }
 }
diff --git a/modules/holoviz/src/layers/image_layer.hpp b/modules/holoviz/src/layers/image_layer.hpp
index 3362dae..81c5d61 100644
--- a/modules/holoviz/src/layers/image_layer.hpp
+++ b/modules/holoviz/src/layers/image_layer.hpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -45,7 +45,7 @@ class ImageLayer : public Layer {
   ~ImageLayer();
 
   /**
-   * Defines the image data for this layer, source is Cuda device memory.
+   * Defines the image data for this layer, source is CUDA device memory.
    *
    * If the image has a alpha value it's multiplied with the layer opacity.
    *
@@ -54,18 +54,29 @@ class ImageLayer : public Layer {
    * the same layer. This enables depth-compositing image layers with other Holoviz layers.
    * Supported depth formats are: D16_UNORM, X8_D24_UNORM, D32_SFLOAT.
    *
+   * Supports multi-planar images (e.g. YUV), `device_ptr` and `row_pitch` specify the parameters
+   * for the first plane (plane 0), `device_ptr_n` and `row_pitch_n` for subsequent planes.
+   *
    * @param width         width of the image
    * @param height        height of the image
    * @param fmt           image format
-   * @param device_ptr    Cuda device memory pointer
-   * @param row_pitch     the number of bytes between each row, if zero then data is assumed to be
-   * contiguous in memory
+   * @param device_ptr    CUDA device memory pointer
+   * @param row_pitch     the number of bytes between each row, if zero then data is
+   * assumed to be contiguous in memory
+   * @param device_ptr_plane_1    CUDA device memory pointer for plane 1
+   * @param row_pitch_1     the number of bytes between each row for plane 1, if zero then data is
+   * assumed to be contiguous in memory
+   * @param device_ptr_plane_2    CUDA device memory pointer for plane 2
+   * @param row_pitch_2     the number of bytes between each row for plane 2, if zero then data is
+   * assumed to be contiguous in memory
    */
   void image_cuda_device(uint32_t width, uint32_t height, ImageFormat fmt, CUdeviceptr device_ptr,
-                         size_t row_pitch = 0);
+                         size_t row_pitch = 0, CUdeviceptr device_ptr_plane_1 = 0,
+                         size_t row_pitch_plane_1 = 0, CUdeviceptr device_ptr_plane_2 = 0,
+                         size_t row_pitch_plane_2 = 0);
 
   /**
-   * Defines the image data for this layer, source is a Cuda array.
+   * Defines the image data for this layer, source is a CUDA array.
    *
    * If the image has a alpha value it's multiplied with the layer opacity.
    *
@@ -75,7 +86,7 @@ class ImageLayer : public Layer {
    * Supported depth formats are: D16_UNORM, X8_D24_UNORM, D32_SFLOAT.
    *
    * @param fmt       image format
-   * @param array     Cuda array
+   * @param array     CUDA array
    */
   void image_cuda_array(ImageFormat fmt, CUarray array);
 
@@ -89,15 +100,26 @@ class ImageLayer : public Layer {
    * the same layer. This enables depth-compositing image layers with other Holoviz layers.
    * Supported depth formats are: D16_UNORM, X8_D24_UNORM, D32_SFLOAT.
    *
+   * Supports multi-planar images (e.g. YUV), `device_ptr` and `row_pitch` specify the parameters
+   * for the first plane (plane 0), `device_ptr_n` and `row_pitch_n` for subsequent planes.
+   *
    * @param width     width of the image
    * @param height    height of the image
    * @param fmt       image format
    * @param data      host memory pointer
    * @param row_pitch the number of bytes between each row, if zero then data is assumed to be
    * contiguous in memory
+   * @param data_plane_1      host memory pointer for plane 1
+   * @param row_pitch_plane_1 the number of bytes between each row for plane 1, if zero then data is
+   * assumed to be contiguous in memory
+   * @param data_plane_2      host memory pointer for plane 2
+   * @param row_pitch_plane_2 the number of bytes between each row for plane 2, if zero then data is
+   * assumed to be contiguous in memory
    */
   void image_host(uint32_t width, uint32_t height, ImageFormat fmt, const void* data,
-                  size_t row_pitch = 0);
+                  size_t row_pitch = 0, const void* data_plane_1 = nullptr,
+                  size_t row_pitch_plane_1 = 0, const void* data_plane_2 = nullptr,
+                  size_t row_pitch_plane_2 = 0);
 
   /**
    * Defines the lookup table for this image layer.
@@ -154,6 +176,30 @@ class ImageLayer : public Layer {
   void image_component_mapping(ComponentSwizzle r, ComponentSwizzle g, ComponentSwizzle b,
                                ComponentSwizzle a);
 
+  /**
+   * Specifies the YUV model conversion.
+   *
+   * @param yuv_model_conversion YUV model conversion. Default is `YUV_601`.
+   */
+  void image_yuv_model_conversion(YuvModelConversion yuv_model_conversion);
+
+  /**
+   * Specifies the YUV range.
+   *
+   * @param yuv_range YUV range. Default is `ITU_FULL`.
+   */
+  void image_yuv_range(YuvRange yuv_range);
+
+  /**
+   * Defines the location of downsampled chroma component samples relative to the luma samples.
+   *
+   * @param x_chroma_location chroma location in x direction for formats which are chroma
+   * downsampled in width (420 and 422). Default is `COSITED_EVEN`.
+   * @param y_chroma_location chroma location in y direction for formats which are chroma
+   * downsampled in height (420). Default is `COSITED_EVEN`.
+   */
+  void image_chroma_location(ChromaLocation x_chroma_location, ChromaLocation y_chroma_location);
+
   /// holoscan::viz::Layer virtual members
   ///@{
   bool can_be_reused(Layer& other) const override;
diff --git a/modules/holoviz/src/vulkan/buffer.cpp b/modules/holoviz/src/vulkan/buffer.cpp
index 452e35e..1d700e8 100644
--- a/modules/holoviz/src/vulkan/buffer.cpp
+++ b/modules/holoviz/src/vulkan/buffer.cpp
@@ -19,17 +19,19 @@
 
 #include <memory>
 
+#include "vulkan_app.hpp"
+
 namespace holoscan::viz {
 
-Buffer::Buffer(vk::Device device, nvvk::ResourceAllocator* alloc, size_t size)
-    : Resource(device, alloc), size_(size) {}
+Buffer::Buffer(Vulkan* vulkan, nvvk::ResourceAllocator* alloc, size_t size)
+    : Resource(vulkan, alloc), size_(size) {}
 
 Buffer::~Buffer() {
-  destroy();
+  wait();
 
   // check if this buffer had been imported to CUDA
   if (device_ptr_) {
-    const CudaService::ScopedPush cuda_context = cuda_service_->PushContext();
+    const CudaService::ScopedPush cuda_context = vulkan_->get_cuda_service()->PushContext();
     device_ptr_.reset();
   }
   alloc_->destroy(buffer_);
@@ -48,7 +50,7 @@ void Buffer::import_to_cuda(const std::unique_ptr<CudaService>& cuda_service) {
   buffer_desc.size = size_;
   buffer_desc.offset = mem_info.offset;
 
-  device_ptr_.reset([external_mem = external_mem_.get(), &buffer_desc] {
+  device_ptr_.reset([external_mem = external_mems_.front().get(), &buffer_desc] {
     CUdeviceptr device_ptr;
     CudaCheck(cuExternalMemoryGetMappedBuffer(&device_ptr, external_mem, &buffer_desc));
     return device_ptr;
diff --git a/modules/holoviz/src/vulkan/buffer.hpp b/modules/holoviz/src/vulkan/buffer.hpp
index d5ee858..00d1acf 100644
--- a/modules/holoviz/src/vulkan/buffer.hpp
+++ b/modules/holoviz/src/vulkan/buffer.hpp
@@ -28,7 +28,7 @@ namespace holoscan::viz {
 
 class Buffer : public Resource {
  public:
-  explicit Buffer(vk::Device device, nvvk::ResourceAllocator* alloc, size_t size);
+  explicit Buffer(Vulkan *vulkan, nvvk::ResourceAllocator* alloc, size_t size);
   Buffer() = delete;
 
   virtual ~Buffer();
diff --git a/modules/holoviz/src/vulkan/format_util.cpp b/modules/holoviz/src/vulkan/format_util.cpp
index b7e3912..3d157ec 100644
--- a/modules/holoviz/src/vulkan/format_util.cpp
+++ b/modules/holoviz/src/vulkan/format_util.cpp
@@ -19,15 +19,18 @@
 
 namespace holoscan::viz {
 
-void format_info(ImageFormat format, uint32_t* src_channels, uint32_t* dst_channels,
-                 uint32_t* component_size) {
+void format_info(ImageFormat format, uint32_t* channels, uint32_t* hw_channels,
+                 uint32_t* component_size, uint32_t* width_divisor, uint32_t* height_divisor,
+                 uint32_t plane) {
+  if (width_divisor) { *width_divisor = 1; }
+  if (height_divisor) { *height_divisor = 1; }
   switch (format) {
     case ImageFormat::R8_UINT:
     case ImageFormat::R8_SINT:
     case ImageFormat::R8_UNORM:
     case ImageFormat::R8_SNORM:
     case ImageFormat::R8_SRGB:
-      *src_channels = *dst_channels = 1u;
+      *channels = *hw_channels = 1u;
       *component_size = sizeof(uint8_t);
       break;
     case ImageFormat::R16_UINT:
@@ -35,7 +38,7 @@ void format_info(ImageFormat format, uint32_t* src_channels, uint32_t* dst_chann
     case ImageFormat::R16_UNORM:
     case ImageFormat::R16_SNORM:
     case ImageFormat::R16_SFLOAT:
-      *src_channels = *dst_channels = 1u;
+      *channels = *hw_channels = 1u;
       *component_size = sizeof(uint16_t);
       break;
     case ImageFormat::R32_UINT:
@@ -43,18 +46,18 @@ void format_info(ImageFormat format, uint32_t* src_channels, uint32_t* dst_chann
     // packed formats are treated as single component formats
     case ImageFormat::A2B10G10R10_UNORM_PACK32:
     case ImageFormat::A2R10G10B10_UNORM_PACK32:
-      *src_channels = *dst_channels = 1u;
+      *channels = *hw_channels = 1u;
       *component_size = sizeof(uint32_t);
       break;
     case ImageFormat::R32_SFLOAT:
-      *src_channels = *dst_channels = 1u;
+      *channels = *hw_channels = 1u;
       *component_size = sizeof(float);
       break;
     case ImageFormat::R8G8B8_UNORM:
     case ImageFormat::R8G8B8_SNORM:
     case ImageFormat::R8G8B8_SRGB:
-      *src_channels = 3u;
-      *dst_channels = 4u;
+      *channels = 3u;
+      *hw_channels = 4u;
       *component_size = sizeof(uint8_t);
       break;
     case ImageFormat::R8G8B8A8_UNORM:
@@ -64,31 +67,124 @@ void format_info(ImageFormat format, uint32_t* src_channels, uint32_t* dst_chann
     case ImageFormat::B8G8R8A8_SRGB:
     case ImageFormat::A8B8G8R8_UNORM_PACK32:
     case ImageFormat::A8B8G8R8_SRGB_PACK32:
-      *src_channels = *dst_channels = 4u;
+      *channels = *hw_channels = 4u;
       *component_size = sizeof(uint8_t);
       break;
     case ImageFormat::R16G16B16A16_UNORM:
     case ImageFormat::R16G16B16A16_SNORM:
     case ImageFormat::R16G16B16A16_SFLOAT:
-      *src_channels = *dst_channels = 4u;
+      *channels = *hw_channels = 4u;
       *component_size = sizeof(uint16_t);
       break;
     case ImageFormat::R32G32B32A32_SFLOAT:
-      *src_channels = *dst_channels = 4u;
+      *channels = *hw_channels = 4u;
       *component_size = sizeof(float);
       break;
     case ImageFormat::D16_UNORM:
-      *src_channels = *dst_channels = 1u;
+      *channels = *hw_channels = 1u;
       *component_size = sizeof(uint16_t);
       break;
     case ImageFormat::X8_D24_UNORM:
-      *src_channels = *dst_channels = 1u;
+      *channels = *hw_channels = 1u;
       *component_size = sizeof(uint32_t);
       break;
     case ImageFormat::D32_SFLOAT:
-      *src_channels = *dst_channels = 1u;
+      *channels = *hw_channels = 1u;
       *component_size = sizeof(uint32_t);
       break;
+    case ImageFormat::Y8U8Y8V8_422_UNORM:
+    case ImageFormat::U8Y8V8Y8_422_UNORM:
+      *channels = *hw_channels = 2u;
+      *component_size = sizeof(uint8_t);
+      break;
+    case ImageFormat::Y8_U8V8_2PLANE_420_UNORM:
+      if (plane == 0) {
+        *channels = *hw_channels = 1u;
+      } else if (plane == 1) {
+        *channels = *hw_channels = 2u;
+        if (width_divisor) { *width_divisor = 2; }
+        if (height_divisor) { *height_divisor = 2; }
+      } else {
+        throw std::invalid_argument("Unhandled plane index");
+      }
+      *component_size = sizeof(uint8_t);
+      break;
+    case ImageFormat::Y8_U8V8_2PLANE_422_UNORM:
+      if (plane == 0) {
+        *channels = *hw_channels = 1u;
+      } else if (plane == 1) {
+        *channels = *hw_channels = 2u;
+        if (width_divisor) { *width_divisor = 2; }
+      } else {
+        throw std::invalid_argument("Unhandled plane index");
+      }
+      *component_size = sizeof(uint8_t);
+      break;
+    case ImageFormat::Y8_U8_V8_3PLANE_420_UNORM:
+      *channels = *hw_channels = 1u;
+      *component_size = sizeof(uint8_t);
+      if (plane == 0) {
+      } else if ((plane == 1) || (plane == 2)) {
+        if (width_divisor) { *width_divisor = 2; }
+        if (height_divisor) { *height_divisor = 2; }
+      } else {
+        throw std::invalid_argument("Unhandled plane index");
+      }
+      break;
+    case ImageFormat::Y8_U8_V8_3PLANE_422_UNORM:
+      *channels = *hw_channels = 1u;
+      *component_size = sizeof(uint8_t);
+      if (plane == 0) {
+      } else if ((plane == 1) || (plane == 2)) {
+        if (width_divisor) { *width_divisor = 2; }
+      } else {
+        throw std::invalid_argument("Unhandled plane index");
+      }
+      break;
+    case ImageFormat::Y16_U16V16_2PLANE_420_UNORM:
+      if (plane == 0) {
+        *channels = *hw_channels = 1u;
+      } else if (plane == 1) {
+        *channels = *hw_channels = 2u;
+        if (width_divisor) { *width_divisor = 2; }
+        if (height_divisor) { *height_divisor = 2; }
+      } else {
+        throw std::invalid_argument("Unhandled plane index");
+      }
+      *component_size = sizeof(uint16_t);
+      break;
+    case ImageFormat::Y16_U16V16_2PLANE_422_UNORM:
+      if (plane == 0) {
+        *channels = *hw_channels = 1u;
+      } else if (plane == 1) {
+        *channels = *hw_channels = 2u;
+        if (width_divisor) { *width_divisor = 2; }
+      } else {
+        throw std::invalid_argument("Unhandled plane index");
+      }
+      *component_size = sizeof(uint16_t);
+      break;
+    case ImageFormat::Y16_U16_V16_3PLANE_420_UNORM:
+      *channels = *hw_channels = 1u;
+      *component_size = sizeof(uint16_t);
+      if (plane == 0) {
+      } else if ((plane == 1) || (plane == 2)) {
+        if (width_divisor) { *width_divisor = 2; }
+        if (height_divisor) { *height_divisor = 2; }
+      } else {
+        throw std::invalid_argument("Unhandled plane index");
+      }
+      break;
+    case ImageFormat::Y16_U16_V16_3PLANE_422_UNORM:
+      *channels = *hw_channels = 1u;
+      *component_size = sizeof(uint16_t);
+      if (plane == 0) {
+      } else if ((plane == 1) || (plane == 2)) {
+        if (width_divisor) { *width_divisor = 2; }
+      } else {
+        throw std::invalid_argument("Unhandled plane index");
+      }
+      break;
     default:
       throw std::runtime_error("Unhandled image format.");
   }
@@ -197,6 +293,36 @@ vk::Format to_vulkan_format(ImageFormat format) {
     case ImageFormat::A8B8G8R8_SRGB_PACK32:
       vk_format = vk::Format::eA8B8G8R8SrgbPack32;
       break;
+    case ImageFormat::Y8U8Y8V8_422_UNORM:
+      vk_format = vk::Format::eG8B8G8R8422Unorm;
+      break;
+    case ImageFormat::U8Y8V8Y8_422_UNORM:
+      vk_format = vk::Format::eB8G8R8G8422Unorm;
+      break;
+    case ImageFormat::Y8_U8V8_2PLANE_420_UNORM:
+      vk_format = vk::Format::eG8B8R82Plane420Unorm;
+      break;
+    case ImageFormat::Y8_U8V8_2PLANE_422_UNORM:
+      vk_format = vk::Format::eG8B8R82Plane422Unorm;
+      break;
+    case ImageFormat::Y8_U8_V8_3PLANE_420_UNORM:
+      vk_format = vk::Format::eG8B8R83Plane420Unorm;
+      break;
+    case ImageFormat::Y8_U8_V8_3PLANE_422_UNORM:
+      vk_format = vk::Format::eG8B8R83Plane422Unorm;
+      break;
+    case ImageFormat::Y16_U16V16_2PLANE_420_UNORM:
+      vk_format = vk::Format::eG16B16R162Plane420Unorm;
+      break;
+    case ImageFormat::Y16_U16V16_2PLANE_422_UNORM:
+      vk_format = vk::Format::eG16B16R162Plane422Unorm;
+      break;
+    case ImageFormat::Y16_U16_V16_3PLANE_420_UNORM:
+      vk_format = vk::Format::eG16B16R163Plane420Unorm;
+      break;
+    case ImageFormat::Y16_U16_V16_3PLANE_422_UNORM:
+      vk_format = vk::Format::eG16B16R163Plane422Unorm;
+      break;
     default:
       throw std::runtime_error("Unhandled image format.");
   }
@@ -294,7 +420,8 @@ std::optional<ImageFormat> to_image_format(vk::Format vk_format) {
       break;
     case vk::Format::eA8B8G8R8SrgbPack32:
       image_format = ImageFormat::A8B8G8R8_SRGB_PACK32;
-      break;    default:
+      break;
+    default:
       break;
   }
 
@@ -326,4 +453,43 @@ vk::ColorSpaceKHR to_vulkan_color_space(ColorSpace color_space) {
   return vk_color_space;
 }
 
+bool is_depth_format(ImageFormat fmt) {
+  return ((fmt == ImageFormat::D16_UNORM) || (fmt == ImageFormat::X8_D24_UNORM) ||
+          (fmt == ImageFormat::D32_SFLOAT));
+}
+
+bool is_yuv_format(ImageFormat fmt) {
+  switch (fmt) {
+    case ImageFormat::Y8U8Y8V8_422_UNORM:
+    case ImageFormat::U8Y8V8Y8_422_UNORM:
+    case ImageFormat::Y8_U8V8_2PLANE_420_UNORM:
+    case ImageFormat::Y8_U8V8_2PLANE_422_UNORM:
+    case ImageFormat::Y8_U8_V8_3PLANE_420_UNORM:
+    case ImageFormat::Y8_U8_V8_3PLANE_422_UNORM:
+    case ImageFormat::Y16_U16V16_2PLANE_420_UNORM:
+    case ImageFormat::Y16_U16V16_2PLANE_422_UNORM:
+    case ImageFormat::Y16_U16_V16_3PLANE_420_UNORM:
+    case ImageFormat::Y16_U16_V16_3PLANE_422_UNORM:
+      return true;
+    default:
+      return false;
+  }
+}
+
+bool is_multi_planar_format(ImageFormat fmt) {
+  switch (fmt) {
+    case ImageFormat::Y8_U8V8_2PLANE_420_UNORM:
+    case ImageFormat::Y8_U8V8_2PLANE_422_UNORM:
+    case ImageFormat::Y8_U8_V8_3PLANE_420_UNORM:
+    case ImageFormat::Y8_U8_V8_3PLANE_422_UNORM:
+    case ImageFormat::Y16_U16V16_2PLANE_420_UNORM:
+    case ImageFormat::Y16_U16V16_2PLANE_422_UNORM:
+    case ImageFormat::Y16_U16_V16_3PLANE_420_UNORM:
+    case ImageFormat::Y16_U16_V16_3PLANE_422_UNORM:
+      return true;
+    default:
+      return false;
+  }
+}
+
 }  // namespace holoscan::viz
diff --git a/modules/holoviz/src/vulkan/format_util.hpp b/modules/holoviz/src/vulkan/format_util.hpp
index 40a1de4..853b72e 100644
--- a/modules/holoviz/src/vulkan/format_util.hpp
+++ b/modules/holoviz/src/vulkan/format_util.hpp
@@ -27,13 +27,55 @@
 
 namespace holoscan::viz {
 
-void format_info(ImageFormat format, uint32_t* src_channels, uint32_t* dst_channels,
-                 uint32_t* component_size);
+/**
+ * Get information on a format
+ *
+ * @param format format to get information from
+ * @param channels format channels
+ * @param hw_channels channels when used by Vulkan (different from `channels` for RGB8 formats)
+ * @param component_size size in bytes of one component
+ * @param width_divisor width divisor for multi-planar formats
+ * @param height_divisor height divisor for multi-planar formats
+ * @param plane image plane for multi-planar formats
+ */
+void format_info(ImageFormat format, uint32_t* channels, uint32_t* hw_channels,
+                 uint32_t* component_size, uint32_t* width_divisor = nullptr,
+                 uint32_t* height_divisor = nullptr, uint32_t plane = 0);
+
+/**
+ * Convert a ImageFormat enum to a Vulkan format enum
+ *
+ * @param format ImageFormat enum
+ * @return vk::Format Vulkan format enum
+ */
 vk::Format to_vulkan_format(ImageFormat format);
+
+/**
+ * Convert a Vulkan format enum to a ImageFormat enum. If there is no matching ImageFormat then
+ * the return value will not be valid.
+ *
+ * @param vk_format Vulkan format enum
+ * @return std::optional<ImageFormat> ImageFormat enum
+ */
 std::optional<ImageFormat> to_image_format(vk::Format vk_format);
 
+/**
+ * Convert a ColorSpace enum to a Vulkan color space enum
+ *
+ * @param color_space ColorSpace enum
+ * @return vk::ColorSpaceKHR Vulkan color space enum
+ */
 vk::ColorSpaceKHR to_vulkan_color_space(ColorSpace color_space);
 
+/// @return true if fmt is a depth format
+bool is_depth_format(ImageFormat fmt);
+
+/// @return true if fmt is a ycpcr format
+bool is_yuv_format(ImageFormat fmt);
+
+/// @return true if fmt is multi-planar
+bool is_multi_planar_format(ImageFormat fmt);
+
 }  // namespace holoscan::viz
 
 #endif /* MODULES_HOLOVIZ_SRC_VULKAN_FORMAT_UTIL_HPP */
diff --git a/modules/holoviz/src/vulkan/resource.cpp b/modules/holoviz/src/vulkan/resource.cpp
index 865de01..2ab03ff 100644
--- a/modules/holoviz/src/vulkan/resource.cpp
+++ b/modules/holoviz/src/vulkan/resource.cpp
@@ -20,20 +20,36 @@
 #include <unistd.h>
 
 #include <memory>
+#include <utility>
 
 #include <holoscan/logger/logger.hpp>
 
+#include "vulkan_app.hpp"
+
 namespace holoscan::viz {
 
-Resource::Resource(vk::Device device, nvvk::ResourceAllocator* alloc)
-    : device_(device), alloc_(alloc) {}
+Resource::Resource(Vulkan* vulkan, nvvk::ResourceAllocator* alloc)
+    : vulkan_(vulkan), alloc_(alloc) {}
 
 Resource::~Resource() {
-  destroy();
+  // `wait()` needs to be called before destroying resource
+  assert(!fence_);
+
+  // check if this resource had been imported to CUDA
+  if (!external_mems_.empty()) {
+    const CudaService::ScopedPush cuda_context = vulkan_->get_cuda_service()->PushContext();
+
+    external_mems_.clear();
+    cuda_access_signal_semaphore_.reset();
+    vulkan_access_wait_semaphore_.reset();
+
+    cuda_access_wait_semaphore_.reset();
+    vulkan_access_signal_semaphore_.reset();
+  }
 }
 
 void Resource::access_with_vulkan(nvvk::BatchSubmission& batch_submission) {
-  if (external_mem_) {
+  if (!external_mems_.empty()) {
     if (state_ == AccessState::CUDA) {
       // enqueue the semaphore signalled by CUDA to be waited on by rendering
       batch_submission.enqueueWait(cuda_access_wait_semaphore_.get(),
@@ -64,64 +80,44 @@ void Resource::end_access_with_cuda(CUstream stream) {
   state_ = AccessState::CUDA;
 }
 
-void Resource::destroy() {
-  if (fence_) {
-    // if the resource had been tagged with a fence, wait for it before freeing the memory
-    const vk::Result result = device_.waitForFences(fence_, true, 100'000'000);
-    if (result != vk::Result::eSuccess) {
-      HOLOSCAN_LOG_WARN("Waiting for texture fence failed with {}", vk::to_string(result));
-    }
-    fence_ = nullptr;
-  }
-
-  // check if this resource had been imported to CUDA
-  if (external_mem_) {
-    const CudaService::ScopedPush cuda_context = cuda_service_->PushContext();
-
-    external_mem_.reset();
-    cuda_access_signal_semaphore_.reset();
-    vulkan_access_wait_semaphore_.reset();
-
-    cuda_access_wait_semaphore_.reset();
-    vulkan_access_signal_semaphore_.reset();
-  }
-}
-
 void Resource::import_to_cuda(const std::unique_ptr<CudaService>& cuda_service,
-                    const nvvk::MemAllocator::MemInfo& mem_info) {
-  cuda_service_ = cuda_service.get();
-
+                              const nvvk::MemAllocator::MemInfo& mem_info) {
   vk::MemoryGetFdInfoKHR memory_get_fd_info;
   memory_get_fd_info.memory = mem_info.memory;
   memory_get_fd_info.handleType = vk::ExternalMemoryHandleTypeFlagBits::eOpaqueFd;
   UniqueValue<int, decltype(&close), &close> file_handle;
-  file_handle.reset(device_.getMemoryFdKHR(memory_get_fd_info));
+  file_handle.reset(vulkan_->get_device().getMemoryFdKHR(memory_get_fd_info));
 
   CUDA_EXTERNAL_MEMORY_HANDLE_DESC memory_handle_desc{};
   memory_handle_desc.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD;
   memory_handle_desc.handle.fd = file_handle.get();
   memory_handle_desc.size = mem_info.offset + mem_info.size;
 
-  external_mem_.reset([&memory_handle_desc] {
+  UniqueCUexternalMemory external_mem([&memory_handle_desc] {
     CUexternalMemory external_mem;
     CudaCheck(cuImportExternalMemory(&external_mem, &memory_handle_desc));
     return external_mem;
   }());
+  external_mems_.push_back(std::move(external_mem));
   // don't need to close the file handle if it had been successfully imported
   file_handle.release();
 
-  // create the semaphores, one for waiting after CUDA access and one for signalling
-  // Vulkan access
-  vk::StructureChain<vk::SemaphoreCreateInfo, vk::ExportSemaphoreCreateInfoKHR> chain;
-  vk::SemaphoreCreateInfo& semaphore_create_info = chain.get<vk::SemaphoreCreateInfo>();
-  chain.get<vk::ExportSemaphoreCreateInfoKHR>().handleTypes =
-      vk::ExternalSemaphoreHandleTypeFlagBits::eOpaqueFd;
-  cuda_access_wait_semaphore_ = device_.createSemaphoreUnique(semaphore_create_info);
-  vulkan_access_signal_semaphore_ = device_.createSemaphoreUnique(semaphore_create_info);
-
-  // import the semaphore to CUDA
-  cuda_access_signal_semaphore_ = import_semaphore_to_cuda(cuda_access_wait_semaphore_.get());
-  vulkan_access_wait_semaphore_ = import_semaphore_to_cuda(vulkan_access_signal_semaphore_.get());
+  if (!cuda_access_wait_semaphore_) {
+    // create the semaphores, one for waiting after CUDA access and one for signalling
+    // Vulkan access
+    vk::StructureChain<vk::SemaphoreCreateInfo, vk::ExportSemaphoreCreateInfoKHR> chain;
+    vk::SemaphoreCreateInfo& semaphore_create_info = chain.get<vk::SemaphoreCreateInfo>();
+    chain.get<vk::ExportSemaphoreCreateInfoKHR>().handleTypes =
+        vk::ExternalSemaphoreHandleTypeFlagBits::eOpaqueFd;
+    cuda_access_wait_semaphore_ =
+        vulkan_->get_device().createSemaphoreUnique(semaphore_create_info);
+    vulkan_access_signal_semaphore_ =
+        vulkan_->get_device().createSemaphoreUnique(semaphore_create_info);
+
+    // import the semaphore to CUDA
+    cuda_access_signal_semaphore_ = import_semaphore_to_cuda(cuda_access_wait_semaphore_.get());
+    vulkan_access_wait_semaphore_ = import_semaphore_to_cuda(vulkan_access_signal_semaphore_.get());
+  }
 }
 
 UniqueCUexternalSemaphore Resource::import_semaphore_to_cuda(vk::Semaphore semaphore) {
@@ -130,7 +126,7 @@ UniqueCUexternalSemaphore Resource::import_semaphore_to_cuda(vk::Semaphore semap
   semaphore_get_fd_info.handleType = vk::ExternalSemaphoreHandleTypeFlagBits::eOpaqueFd;
 
   UniqueValue<int, decltype(&close), &close> file_handle;
-  file_handle.reset(device_.getSemaphoreFdKHR(semaphore_get_fd_info));
+  file_handle.reset(vulkan_->get_device().getSemaphoreFdKHR(semaphore_get_fd_info));
 
   CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC semaphore_handle_desc{};
   semaphore_handle_desc.type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD;
@@ -149,4 +145,15 @@ UniqueCUexternalSemaphore Resource::import_semaphore_to_cuda(vk::Semaphore semap
   return cuda_semaphore;
 }
 
+void Resource::wait() {
+  if (fence_) {
+    // if the resource had been tagged with a fence, wait for it before freeing the memory
+    const vk::Result result = vulkan_->get_device().waitForFences(fence_, true, 100'000'000);
+    if (result != vk::Result::eSuccess) {
+      HOLOSCAN_LOG_WARN("Waiting for texture fence failed with {}", vk::to_string(result));
+    }
+    fence_ = nullptr;
+  }
+}
+
 }  // namespace holoscan::viz
diff --git a/modules/holoviz/src/vulkan/resource.hpp b/modules/holoviz/src/vulkan/resource.hpp
index 20460fb..1c2506f 100644
--- a/modules/holoviz/src/vulkan/resource.hpp
+++ b/modules/holoviz/src/vulkan/resource.hpp
@@ -19,6 +19,7 @@
 #define MODULES_HOLOVIZ_SRC_VULKAN_RESOURCE_HPP
 
 #include <memory>
+#include <vector>
 
 #include <nvvk/commands_vk.hpp>
 #include <nvvk/resourceallocator_vk.hpp>
@@ -29,11 +30,13 @@
 
 namespace holoscan::viz {
 
+class Vulkan;
+
 /// Resource base class. Can be shared between CUDA and Vulkan. Access to the resource is
 /// synchronized with semaphores.
 class Resource {
  public:
-  explicit Resource(vk::Device device, nvvk::ResourceAllocator* alloc);
+  explicit Resource(Vulkan* vulkan, nvvk::ResourceAllocator* alloc);
   Resource() = delete;
   virtual ~Resource();
 
@@ -58,6 +61,11 @@ class Resource {
    */
   void end_access_with_cuda(CUstream stream);
 
+  /**
+   * Wait for the access fence to be triggered.
+   */
+  void wait();
+
   /// access state
   enum class AccessState {
     /// not accessed yet
@@ -69,19 +77,15 @@ class Resource {
   };
   AccessState state_ = AccessState::UNKNOWN;
 
-  /// last usage of the texture, need to sync before destroying memory
+  /// last usage of the resource, need to sync before destroying memory
   vk::Fence fence_ = nullptr;
 
  protected:
-  UniqueCUexternalMemory external_mem_;
+  std::vector<UniqueCUexternalMemory> external_mems_;
 
-  const vk::Device device_;
+  Vulkan* const vulkan_;
   nvvk::ResourceAllocator* const alloc_;
 
-  CudaService* cuda_service_ = nullptr;
-
-  void destroy();
-
   void import_to_cuda(const std::unique_ptr<CudaService>& cuda_service,
                       const nvvk::MemAllocator::MemInfo& mem_info);
 
diff --git a/modules/holoviz/src/vulkan/texture.cpp b/modules/holoviz/src/vulkan/texture.cpp
index 397b42a..99ada3e 100644
--- a/modules/holoviz/src/vulkan/texture.cpp
+++ b/modules/holoviz/src/vulkan/texture.cpp
@@ -18,22 +18,25 @@
 #include "texture.hpp"
 
 #include <memory>
+#include <utility>
 
+#include "../cuda/convert.hpp"
 #include "format_util.hpp"
+#include "vulkan_app.hpp"
 
 namespace holoscan::viz {
 
-Texture::Texture(vk::Device device, nvvk::ResourceAllocator* alloc, uint32_t width,
-                 uint32_t height, ImageFormat format)
-    : Resource(device, alloc), width_(width), height_(height), format_(format) {}
+Texture::Texture(Vulkan* vulkan, nvvk::ResourceAllocator* alloc, uint32_t width, uint32_t height,
+                 ImageFormat format)
+    : Resource(vulkan, alloc), width_(width), height_(height), format_(format) {}
 
 Texture::~Texture() {
-  destroy();
+  wait();
 
   // check if this texture had been imported to CUDA
-  if (mipmap_) {
-    const CudaService::ScopedPush cuda_context = cuda_service_->PushContext();
-    mipmap_.reset();
+  if (!mipmaps_.empty()) {
+    const CudaService::ScopedPush cuda_context = vulkan_->get_cuda_service()->PushContext();
+    mipmaps_.clear();
   }
   alloc_->destroy(texture_);
 }
@@ -41,44 +44,219 @@ Texture::~Texture() {
 void Texture::import_to_cuda(const std::unique_ptr<CudaService>& cuda_service) {
   const CudaService::ScopedPush cuda_context = cuda_service->PushContext();
 
-  const nvvk::MemAllocator::MemInfo mem_info =
-      alloc_->getMemoryAllocator()->getMemoryInfo(texture_.memHandle);
-
-  // call the base class for creating the external mem and the semaphores
-  Resource::import_to_cuda(cuda_service, mem_info);
-
-  uint32_t src_channels, dst_channels, component_size;
-  format_info(format_, &src_channels, &dst_channels, &component_size);
-
-  CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC mipmapped_array_desc{};
-  mipmapped_array_desc.arrayDesc.Width = width_;
-  mipmapped_array_desc.arrayDesc.Height = height_;
-  mipmapped_array_desc.arrayDesc.Depth = 0;
-  switch (component_size) {
-    case 1:
-      mipmapped_array_desc.arrayDesc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
-      break;
-    case 2:
-      mipmapped_array_desc.arrayDesc.Format = CU_AD_FORMAT_UNSIGNED_INT16;
-      break;
-    case 4:
-      mipmapped_array_desc.arrayDesc.Format = CU_AD_FORMAT_UNSIGNED_INT32;
-      break;
-    default:
-      throw std::runtime_error("Unhandled component size");
+  if (is_yuv_format(format_)) {
+    // can't upload directly to YUV textures. Create a buffer, import it to CUDA and when
+    // uploading, copy to that buffer and update the texture from the buffer using Vulkan upload.
+    for (uint32_t plane = 0; plane < texture_.memHandles.size(); ++plane) {
+      uint32_t channels, hw_channels, component_size, width_divisor, height_divisior;
+      format_info(format_,
+                  &channels,
+                  &hw_channels,
+                  &component_size,
+                  &width_divisor,
+                  &height_divisior,
+                  plane);
+
+      const size_t size =
+          channels * (width_ / width_divisor) * (height_ / height_divisior) * component_size;
+      upload_buffers_.emplace_back(
+          vulkan_->create_buffer_for_cuda_interop(size, vk::BufferUsageFlagBits::eTransferSrc));
+      upload_buffers_.back()->import_to_cuda(cuda_service);
+    }
+  } else {
+    for (uint32_t plane = 0; plane < texture_.memHandles.size(); ++plane) {
+      const nvvk::MemAllocator::MemInfo mem_info =
+          alloc_->getMemoryAllocator()->getMemoryInfo(texture_.memHandles[plane]);
+
+      // call the base class for creating the external mem and the semaphores
+      Resource::import_to_cuda(cuda_service, mem_info);
+
+      uint32_t channels, hw_channels, component_size, width_divisor, height_divisior;
+      format_info(format_,
+                  &channels,
+                  &hw_channels,
+                  &component_size,
+                  &width_divisor,
+                  &height_divisior,
+                  plane);
+
+      CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC mipmapped_array_desc{};
+      mipmapped_array_desc.arrayDesc.Width = width_ / width_divisor;
+      mipmapped_array_desc.arrayDesc.Height = height_ / height_divisior;
+      mipmapped_array_desc.arrayDesc.Depth = 0;
+      switch (component_size) {
+        case 1:
+          mipmapped_array_desc.arrayDesc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
+          break;
+        case 2:
+          mipmapped_array_desc.arrayDesc.Format = CU_AD_FORMAT_UNSIGNED_INT16;
+          break;
+        case 4:
+          mipmapped_array_desc.arrayDesc.Format = CU_AD_FORMAT_UNSIGNED_INT32;
+          break;
+        default:
+          throw std::runtime_error("Unhandled component size");
+      }
+      mipmapped_array_desc.arrayDesc.NumChannels = hw_channels;
+      mipmapped_array_desc.arrayDesc.Flags = 0;
+      if (channels != hw_channels) {
+        // need surface LDST for RGB to RGBA conversion kernel
+        mipmapped_array_desc.arrayDesc.Flags |= CUDA_ARRAY3D_SURFACE_LDST;
+      }
+
+      mipmapped_array_desc.numLevels = 1;
+      mipmapped_array_desc.offset = mem_info.offset;
+
+      UniqueCUmipmappedArray mipmap(
+          [external_mem = external_mems_.back().get(), &mipmapped_array_desc] {
+            CUmipmappedArray mipmaped_array;
+            CudaCheck(cuExternalMemoryGetMappedMipmappedArray(
+                &mipmaped_array, external_mem, &mipmapped_array_desc));
+            return mipmaped_array;
+          }());
+      mipmaps_.push_back(std::move(mipmap));
+    }
+  }
+}
+
+void Texture::upload(CUstream ext_stream, const std::array<CUdeviceptr, 3>& device_ptr,
+                     const std::array<size_t, 3>& row_pitch) {
+  assert(device_ptr.size() == row_pitch.size());
+
+  if (mipmaps_.empty() && upload_buffers_.empty()) {
+    throw std::runtime_error("Texture had not been imported to CUDA, can't upload data.");
+  }
+
+  const CudaService::ScopedPush cuda_context = vulkan_->get_cuda_service()->PushContext();
+
+  // select the stream to be used by CUDA operations
+  const CUstream stream = vulkan_->get_cuda_service()->select_cuda_stream(ext_stream);
+
+  if (!mipmaps_.empty()) {
+    // start accessing the texture with CUDA
+    begin_access_with_cuda(stream);
+  }
+
+  std::array<Buffer*, 3> buffers{};
+  for (uint32_t plane = 0; plane < device_ptr.size(); ++plane) {
+    if (!device_ptr[plane]) { break; }
+
+    uint32_t channels, hw_channels, component_size, width_divisor, height_divisior;
+    format_info(format_,
+                &channels,
+                &hw_channels,
+                &component_size,
+                &width_divisor,
+                &height_divisior,
+                plane);
+
+    // the width and height might be different for each plane for Y'CbCr formats
+    const uint32_t width = width_ / width_divisor;
+    const uint32_t height = height_ / height_divisior;
+
+    size_t src_pitch =
+        row_pitch[plane] != 0 ? row_pitch[plane] : width * channels * component_size;
+
+    if (!mipmaps_.empty()) {
+      // direct upload to CUDA imported Vulkan texture by copying to CUDA array
+      CUarray array;
+      CudaCheck(cuMipmappedArrayGetLevel(&array, mipmaps_[plane].get(), 0));
+
+      if (channels != hw_channels) {
+        // three channel texture data is not hardware natively supported, convert to four channel
+        if ((channels != 3) || (hw_channels != 4) || (component_size != 1)) {
+          throw std::runtime_error("Unhandled conversion.");
+        }
+
+        // if the source CUDA memory is on a different device, allocate temporary memory, copy from
+        // the source memory to the temporary memory and start the convert kernel using the
+        // temporary memory
+        UniqueAsyncCUdeviceptr tmp_device_ptr;
+        if (!vulkan_->get_cuda_service()->IsMemOnDevice(device_ptr[plane])) {
+          const size_t tmp_pitch = width * channels * component_size;
+
+          // allocate temporary memory, note this is using the stream ordered memory allocator which
+          // is not syncing globally like the normal `cuMemAlloc`
+          tmp_device_ptr.reset([size = tmp_pitch * height, stream] {
+            CUdeviceptr device_ptr;
+            CudaCheck(cuMemAllocAsync(&device_ptr, size, stream));
+            return std::pair<CUdeviceptr, CUstream>(device_ptr, stream);
+          }());
+
+          CUDA_MEMCPY2D memcpy_2d{};
+          memcpy_2d.srcMemoryType = CU_MEMORYTYPE_DEVICE;
+          memcpy_2d.srcDevice = device_ptr[plane];
+          memcpy_2d.srcPitch = src_pitch;
+          memcpy_2d.dstMemoryType = CU_MEMORYTYPE_DEVICE;
+          memcpy_2d.dstDevice = tmp_device_ptr.get().first;
+          memcpy_2d.dstPitch = tmp_pitch;
+          memcpy_2d.WidthInBytes = tmp_pitch;
+          memcpy_2d.Height = height;
+          CudaCheck(cuMemcpy2DAsync(&memcpy_2d, stream));
+
+          src_pitch = tmp_pitch;
+        }
+
+        uint8_t alpha;
+        switch (format_) {
+          case ImageFormat::R8G8B8_UNORM:
+          case ImageFormat::R8G8B8_SRGB:
+            alpha = 0xFf;
+            break;
+          case ImageFormat::R8G8B8_SNORM:
+            alpha = 0x7f;
+            break;
+          default:
+            throw std::runtime_error("Unhandled format.");
+        }
+
+        ConvertR8G8B8ToR8G8B8A8(width,
+                                height,
+                                tmp_device_ptr ? tmp_device_ptr.get().first : device_ptr[plane],
+                                src_pitch,
+                                array,
+                                stream,
+                                alpha);
+      } else {
+        // else just copy
+        CUDA_MEMCPY2D memcpy_2d{};
+        memcpy_2d.srcMemoryType = CU_MEMORYTYPE_DEVICE;
+        memcpy_2d.srcDevice = device_ptr[plane];
+        memcpy_2d.srcPitch = src_pitch;
+        memcpy_2d.dstMemoryType = CU_MEMORYTYPE_ARRAY;
+        memcpy_2d.dstArray = array;
+        memcpy_2d.WidthInBytes = width * hw_channels * component_size;
+        memcpy_2d.Height = height;
+        CudaCheck(cuMemcpy2DAsync(&memcpy_2d, stream));
+      }
+    } else {
+      // copy to Vulkan buffer which had been imported to CUDA and the use Vulkan to upload to
+      // texture
+      upload_buffers_[plane]->begin_access_with_cuda(stream);
+
+      CUDA_MEMCPY2D memcpy_2d{};
+      memcpy_2d.srcMemoryType = CU_MEMORYTYPE_DEVICE;
+      memcpy_2d.srcDevice = device_ptr[plane];
+      memcpy_2d.srcPitch = src_pitch;
+      memcpy_2d.dstMemoryType = CU_MEMORYTYPE_DEVICE;
+      memcpy_2d.dstDevice = upload_buffers_[plane]->device_ptr_.get();
+      memcpy_2d.dstPitch = memcpy_2d.WidthInBytes = width * hw_channels * component_size;
+      memcpy_2d.Height = height;
+      CudaCheck(cuMemcpy2DAsync(&memcpy_2d, stream));
+
+      upload_buffers_[plane]->end_access_with_cuda(stream);
+      buffers[plane] = upload_buffers_[plane].get();
+    }
+  }
+
+  if (!mipmaps_.empty()) {
+    // indicate that the texture had been used by CUDA
+    end_access_with_cuda(stream);
+  } else {
+    vulkan_->upload_to_texture(this, buffers);
   }
-  mipmapped_array_desc.arrayDesc.NumChannels = dst_channels;
-  mipmapped_array_desc.arrayDesc.Flags = CUDA_ARRAY3D_SURFACE_LDST;
-
-  mipmapped_array_desc.numLevels = 1;
-  mipmapped_array_desc.offset = mem_info.offset;
-
-  mipmap_.reset([external_mem = external_mem_.get(), &mipmapped_array_desc] {
-    CUmipmappedArray mipmaped_array;
-    CudaCheck(cuExternalMemoryGetMappedMipmappedArray(
-        &mipmaped_array, external_mem, &mipmapped_array_desc));
-    return mipmaped_array;
-  }());
+
+  CudaService::sync_with_selected_stream(ext_stream, stream);
 }
 
 }  // namespace holoscan::viz
diff --git a/modules/holoviz/src/vulkan/texture.hpp b/modules/holoviz/src/vulkan/texture.hpp
index cc299d7..9cd851a 100644
--- a/modules/holoviz/src/vulkan/texture.hpp
+++ b/modules/holoviz/src/vulkan/texture.hpp
@@ -19,28 +19,52 @@
 #define MODULES_HOLOVIZ_SRC_VULKAN_TEXTURE_HPP
 
 #include <memory>
+#include <vector>
+
+#include <nvvk/descriptorsets_vk.hpp>
 
 #include "resource.hpp"
 
 #include "../holoviz/image_format.hpp"
+#include "buffer.hpp"
 
 namespace holoscan::viz {
 
 class Texture : public Resource {
  public:
-  explicit Texture(vk::Device device, nvvk::ResourceAllocator* alloc, uint32_t width,
-                   uint32_t height, ImageFormat format);
+  explicit Texture(Vulkan* vulkan, nvvk::ResourceAllocator* alloc, uint32_t width, uint32_t height,
+                   ImageFormat format);
   Texture() = delete;
   virtual ~Texture();
 
   void import_to_cuda(const std::unique_ptr<CudaService>& cuda_service);
 
+  /**
+   * Upload data from CUDA device memory to a texture which had been imported to CUDA with
+   * ::import_to_cuda.
+   *
+   * @param ext_stream    CUDA stream to use for operations
+   * @param device_ptr    Cuda device memory pointer for the planes
+   * @param row_pitch     the number of bytes between each row for the planes, if zero then data is
+   * assumed to be contiguous in memory
+   */
+  void upload(CUstream ext_stream, const std::array<CUdeviceptr, 3>& device_ptr,
+              const std::array<size_t, 3>& row_pitch);
+
   const uint32_t width_;
   const uint32_t height_;
   const ImageFormat format_;
 
   nvvk::Texture texture_{};
-  UniqueCUmipmappedArray mipmap_;
+  std::vector<UniqueCUmipmappedArray> mipmaps_;
+  std::vector<std::unique_ptr<Buffer>> upload_buffers_;
+
+  vk::UniqueSamplerYcbcrConversion sampler_ycbcr_conversion_;
+
+  nvvk::DescriptorSetBindings desc_set_layout_bind_;
+  vk::UniqueDescriptorSetLayout desc_set_layout_;
+  vk::UniquePipelineLayout pipeline_layout_;
+  vk::UniquePipeline pipeline_;
 };
 
 }  // namespace holoscan::viz
diff --git a/modules/holoviz/src/vulkan/vulkan_app.cpp b/modules/holoviz/src/vulkan/vulkan_app.cpp
index 4510158..f729124 100644
--- a/modules/holoviz/src/vulkan/vulkan_app.cpp
+++ b/modules/holoviz/src/vulkan/vulkan_app.cpp
@@ -82,6 +82,7 @@ class Vulkan::Impl {
 
   Window* get_window() const;
   CudaService* get_cuda_service() const;
+  vk::Device get_device() const;
   std::vector<SurfaceFormat> get_surface_formats() const;
   std::vector<PresentMode> get_present_modes() const;
 
@@ -101,20 +102,17 @@ class Vulkan::Impl {
 
   void set_viewport(float x, float y, float width, float height);
 
-  Texture* create_texture_for_cuda_interop(uint32_t width, uint32_t height, ImageFormat format,
-                                           const vk::ComponentMapping& component_mapping,
-                                           vk::Filter filter, bool normalized);
-  Texture* create_texture(uint32_t width, uint32_t height, ImageFormat format, size_t data_size,
-                          const void* data, const vk::ComponentMapping& component_mapping,
-                          vk::Filter filter, bool normalized, bool export_allocation);
+  std::unique_ptr<Texture> create_texture(Vulkan* vulkan, const CreateTextureArgs& args);
 
-  void upload_to_texture(CUdeviceptr device_ptr, size_t row_pitch, Texture* texture,
-                         CUstream stream);
-  void upload_to_texture(const void* host_ptr, size_t row_pitch, Texture* texture);
+  void upload_to_texture(Texture* texture, const std::array<const void*, 3>& host_ptr,
+                         const std::array<size_t, 3>& row_pitch);
+  void upload_to_texture(Texture* texture, const std::array<Buffer*, 3>& buffers);
 
-  Buffer* create_buffer_for_cuda_interop(size_t data_size, vk::BufferUsageFlags usage);
-  Buffer* create_buffer(size_t data_size, vk::BufferUsageFlags usage,
-                        bool export_allocation = false, const void* data = nullptr);
+  std::unique_ptr<Buffer> create_buffer_for_cuda_interop(Vulkan* vulkan, size_t data_size,
+                                                         vk::BufferUsageFlags usage);
+  std::unique_ptr<Buffer> create_buffer(Vulkan* vulkan, size_t data_size,
+                                        vk::BufferUsageFlags usage, bool export_allocation = false,
+                                        const void* data = nullptr);
 
   void upload_to_buffer(size_t data_size, CUdeviceptr device_ptr, Buffer* buffer, size_t dst_offset,
                         CUstream stream);
@@ -145,13 +143,15 @@ class Vulkan::Impl {
                     const std::array<float, 4>& color, float point_size, float line_width,
                     const nvmath::mat4f& view_matrix);
 
-  void read_framebuffer(ImageFormat fmt, uint32_t width, uint32_t height, size_t buffer_size,
-                        CUdeviceptr device_ptr, CUstream stream, size_t row_pitch);
+  void read_framebuffer(Vulkan* vulkan, ImageFormat fmt, uint32_t width, uint32_t height,
+                        size_t buffer_size, CUdeviceptr device_ptr, CUstream stream,
+                        size_t row_pitch);
 
  private:
   void init_im_gui(const std::string& font_path, float font_size_in_pixels);
   void create_framebuffer_sequence();
   void create_render_pass();
+  void create_pipelines();
 
   /**
    * Create all the framebuffers in which the image will be rendered
@@ -294,10 +294,7 @@ class Vulkan::Impl {
   nvvk::DescriptorSetBindings desc_set_layout_bind_;
   vk::UniqueDescriptorSetLayout desc_set_layout_;
 
-  nvvk::DescriptorSetBindings desc_set_layout_bind_imgui_;
   vk::UniqueDescriptorSetLayout desc_set_layout_imgui_;
-  vk::UniqueDescriptorPool desc_pool_imgui_;
-  vk::DescriptorSet desc_set_imgui_;
   vk::Sampler sampler_imgui_;
 
   vk::UniquePipeline image_pipeline_;
@@ -337,12 +334,7 @@ void Vulkan::Impl::setup(Window* window, const std::string& font_path, float fon
   window_ = window;
 
   // Initialize instance independent function pointers
-  {
-    vk::DynamicLoader dl;
-    PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr =
-        dl.getProcAddress<PFN_vkGetInstanceProcAddr>("vkGetInstanceProcAddr");
-    VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr);
-  }
+  VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr);
 
 #ifdef NDEBUG
   nvvk::ContextCreateInfo context_info;
@@ -374,6 +366,7 @@ void Vulkan::Impl::setup(Window* window, const std::string& font_path, float fon
   line_rasterization_feature_.smoothLines = true;
   context_info.addDeviceExtension(
       VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, true /*optional*/, &line_rasterization_feature_);
+  context_info.addDeviceExtension(VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME);
 
   // Creating Vulkan base application
   if (!nvvk_.vk_ctx_.initInstance(context_info)) {
@@ -478,25 +471,25 @@ void Vulkan::Impl::setup(Window* window, const std::string& font_path, float fon
 
   // create the descriptor sets
   desc_set_layout_bind_.addBinding(SAMPLE_BINDING_COLOR,
-                                   VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+                                   vk::DescriptorType::eCombinedImageSampler,
                                    1,
-                                   VK_SHADER_STAGE_FRAGMENT_BIT);
+                                   vk::ShaderStageFlagBits::eFragment);
   desc_set_layout_bind_.addBinding(SAMPLE_BINDING_COLOR_U,
-                                   VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+                                   vk::DescriptorType::eCombinedImageSampler,
                                    1,
-                                   VK_SHADER_STAGE_FRAGMENT_BIT);
+                                   vk::ShaderStageFlagBits::eFragment);
   desc_set_layout_bind_.addBinding(SAMPLE_BINDING_COLOR_S,
-                                   VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+                                   vk::DescriptorType::eCombinedImageSampler,
                                    1,
-                                   VK_SHADER_STAGE_FRAGMENT_BIT);
+                                   vk::ShaderStageFlagBits::eFragment);
   desc_set_layout_bind_.addBinding(SAMPLE_BINDING_LUT,
-                                   VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+                                   vk::DescriptorType::eCombinedImageSampler,
                                    1,
-                                   VK_SHADER_STAGE_FRAGMENT_BIT);
+                                   vk::ShaderStageFlagBits::eFragment);
   desc_set_layout_bind_.addBinding(SAMPLE_BINDING_DEPTH,
-                                   VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+                                   vk::DescriptorType::eCombinedImageSampler,
                                    1,
-                                   VK_SHADER_STAGE_FRAGMENT_BIT);
+                                   vk::ShaderStageFlagBits::eFragment);
   // since we have one fragment shader for all the different texture types and dynamically
   // fetch from the right texture allow partially bound descriptor bindings (for example, when
   // color_u is used, color and colors_s are not bound)
@@ -528,17 +521,14 @@ void Vulkan::Impl::setup(Window* window, const std::string& font_path, float fon
     info.maxAnisotropy = 1.0f;
     sampler_imgui_ = nvvk_.alloc_.acquireSampler(info);
   }
-  desc_set_layout_bind_imgui_.addBinding(SAMPLE_BINDING_COLOR,
-                                         vk::DescriptorType::eCombinedImageSampler,
-                                         1,
-                                         vk::ShaderStageFlagBits::eFragment,
-                                         &sampler_imgui_);
+  nvvk::DescriptorSetBindings desc_set_layout_bind_imgui;
+  desc_set_layout_bind_imgui.addBinding(SAMPLE_BINDING_COLOR,
+                                        vk::DescriptorType::eCombinedImageSampler,
+                                        1,
+                                        vk::ShaderStageFlagBits::eFragment,
+                                        &sampler_imgui_);
   desc_set_layout_imgui_ =
-      vk::UniqueDescriptorSetLayout(desc_set_layout_bind_imgui_.createLayout(device_), device_);
-  desc_pool_imgui_ =
-      vk::UniqueDescriptorPool(desc_set_layout_bind_imgui_.createPool(device_), device_);
-  desc_set_imgui_ =
-      nvvk::allocateDescriptorSet(device_, desc_pool_imgui_.get(), desc_set_layout_imgui_.get());
+      vk::UniqueDescriptorSetLayout(desc_set_layout_bind_imgui.createLayout(device_), device_);
 
   // Push constants
   vk::PushConstantRange push_constant_ranges[2];
@@ -560,23 +550,6 @@ void Vulkan::Impl::setup(Window* window, const std::string& font_path, float fon
     image_pipeline_layout_ = device_.createPipelineLayoutUnique(create_info);
   }
 
-  const std::vector<vk::VertexInputBindingDescription> binding_description_float_3{
-      {0, sizeof(float) * 3, vk::VertexInputRate::eVertex}};
-  const std::vector<vk::VertexInputAttributeDescription> attribute_description_float_3{
-      {0, 0, vk::Format::eR32G32B32Sfloat, 0}};
-
-  // Create the Pipeline
-  image_pipeline_ =
-      create_pipeline(image_pipeline_layout_.get(),
-                      image_shader_glsl_vert,
-                      sizeof(image_shader_glsl_vert) / sizeof(image_shader_glsl_vert[0]),
-                      image_shader_glsl_frag,
-                      sizeof(image_shader_glsl_frag) / sizeof(image_shader_glsl_frag[0]),
-                      vk::PrimitiveTopology::eTriangleList,
-                      {},
-                      binding_description_float_3,
-                      attribute_description_float_3);
-
   // create the pipeline layout for geometry
   {
     vk::PipelineLayoutCreateInfo create_info;
@@ -585,94 +558,6 @@ void Vulkan::Impl::setup(Window* window, const std::string& font_path, float fon
     geometry_pipeline_layout_ = device_.createPipelineLayoutUnique(create_info);
   }
 
-  geometry_point_pipeline_ =
-      create_pipeline(geometry_pipeline_layout_.get(),
-                      geometry_shader_glsl_vert,
-                      sizeof(geometry_shader_glsl_vert) / sizeof(geometry_shader_glsl_vert[0]),
-                      geometry_shader_glsl_frag,
-                      sizeof(geometry_shader_glsl_frag) / sizeof(geometry_shader_glsl_frag[0]),
-                      vk::PrimitiveTopology::ePointList,
-                      {},
-                      binding_description_float_3,
-                      attribute_description_float_3);
-  geometry_line_pipeline_ =
-      create_pipeline(geometry_pipeline_layout_.get(),
-                      geometry_shader_glsl_vert,
-                      sizeof(geometry_shader_glsl_vert) / sizeof(geometry_shader_glsl_vert[0]),
-                      geometry_shader_glsl_frag,
-                      sizeof(geometry_shader_glsl_frag) / sizeof(geometry_shader_glsl_frag[0]),
-                      vk::PrimitiveTopology::eLineList,
-                      {vk::DynamicState::eLineWidth},
-                      binding_description_float_3,
-                      attribute_description_float_3);
-  geometry_line_strip_pipeline_ =
-      create_pipeline(geometry_pipeline_layout_.get(),
-                      geometry_shader_glsl_vert,
-                      sizeof(geometry_shader_glsl_vert) / sizeof(geometry_shader_glsl_vert[0]),
-                      geometry_shader_glsl_frag,
-                      sizeof(geometry_shader_glsl_frag) / sizeof(geometry_shader_glsl_frag[0]),
-                      vk::PrimitiveTopology::eLineStrip,
-                      {vk::DynamicState::eLineWidth},
-                      binding_description_float_3,
-                      attribute_description_float_3);
-  geometry_triangle_pipeline_ =
-      create_pipeline(geometry_pipeline_layout_.get(),
-                      geometry_shader_glsl_vert,
-                      sizeof(geometry_shader_glsl_vert) / sizeof(geometry_shader_glsl_vert[0]),
-                      geometry_shader_glsl_frag,
-                      sizeof(geometry_shader_glsl_frag) / sizeof(geometry_shader_glsl_frag[0]),
-                      vk::PrimitiveTopology::eTriangleList,
-                      {},
-                      binding_description_float_3,
-                      attribute_description_float_3);
-
-  const std::vector<vk::VertexInputBindingDescription> binding_description_float_3_uint8_4{
-      {0, sizeof(float) * 3, vk::VertexInputRate::eVertex},
-      {1, sizeof(uint8_t) * 4, vk::VertexInputRate::eVertex}};
-  const std::vector<vk::VertexInputAttributeDescription> attribute_description_float_3_uint8_4{
-      {0, 0, vk::Format::eR32G32B32Sfloat, 0}, {1, 1, vk::Format::eR8G8B8A8Unorm, 0}};
-
-  geometry_point_color_pipeline_ = create_pipeline(
-      geometry_pipeline_layout_.get(),
-      geometry_color_shader_glsl_vert,
-      sizeof(geometry_color_shader_glsl_vert) / sizeof(geometry_color_shader_glsl_vert[0]),
-      geometry_shader_glsl_frag,
-      sizeof(geometry_shader_glsl_frag) / sizeof(geometry_shader_glsl_frag[0]),
-      vk::PrimitiveTopology::ePointList,
-      {},
-      binding_description_float_3_uint8_4,
-      attribute_description_float_3_uint8_4);
-  geometry_line_color_pipeline_ = create_pipeline(
-      geometry_pipeline_layout_.get(),
-      geometry_color_shader_glsl_vert,
-      sizeof(geometry_color_shader_glsl_vert) / sizeof(geometry_color_shader_glsl_vert[0]),
-      geometry_shader_glsl_frag,
-      sizeof(geometry_shader_glsl_frag) / sizeof(geometry_shader_glsl_frag[0]),
-      vk::PrimitiveTopology::eLineList,
-      {vk::DynamicState::eLineWidth},
-      binding_description_float_3_uint8_4,
-      attribute_description_float_3_uint8_4);
-  geometry_line_strip_color_pipeline_ = create_pipeline(
-      geometry_pipeline_layout_.get(),
-      geometry_color_shader_glsl_vert,
-      sizeof(geometry_color_shader_glsl_vert) / sizeof(geometry_color_shader_glsl_vert[0]),
-      geometry_shader_glsl_frag,
-      sizeof(geometry_shader_glsl_frag) / sizeof(geometry_shader_glsl_frag[0]),
-      vk::PrimitiveTopology::eLineStrip,
-      {vk::DynamicState::eLineWidth},
-      binding_description_float_3_uint8_4,
-      attribute_description_float_3_uint8_4);
-  geometry_triangle_color_pipeline_ = create_pipeline(
-      geometry_pipeline_layout_.get(),
-      geometry_color_shader_glsl_vert,
-      sizeof(geometry_color_shader_glsl_vert) / sizeof(geometry_color_shader_glsl_vert[0]),
-      geometry_shader_glsl_frag,
-      sizeof(geometry_shader_glsl_frag) / sizeof(geometry_shader_glsl_frag[0]),
-      vk::PrimitiveTopology::eTriangleList,
-      {},
-      binding_description_float_3_uint8_4,
-      attribute_description_float_3_uint8_4);
-
   // create the pipeline layout for imgui
   {
     vk::PipelineLayoutCreateInfo create_info;
@@ -683,27 +568,7 @@ void Vulkan::Impl::setup(Window* window, const std::string& font_path, float fon
     imgui_pipeline_layout_ = device_.createPipelineLayoutUnique(create_info);
   }
 
-  const std::vector<vk::VertexInputBindingDescription> binding_description_imgui{
-      {0, sizeof(ImDrawVert), vk::VertexInputRate::eVertex}};
-  const std::vector<vk::VertexInputAttributeDescription> attribute_description_imgui{
-      {0, 0, vk::Format::eR32G32Sfloat, IM_OFFSETOF(ImDrawVert, pos)},
-      {1, 0, vk::Format::eR32G32Sfloat, IM_OFFSETOF(ImDrawVert, uv)},
-      {2, 0, vk::Format::eR8G8B8A8Unorm, IM_OFFSETOF(ImDrawVert, col)}};
-
-  imgui_pipeline_ =
-      create_pipeline(imgui_pipeline_layout_.get(),
-                      imgui_shader_glsl_vert,
-                      sizeof(imgui_shader_glsl_vert) / sizeof(imgui_shader_glsl_vert[0]),
-                      imgui_shader_glsl_frag,
-                      sizeof(imgui_shader_glsl_frag) / sizeof(imgui_shader_glsl_frag[0]),
-                      vk::PrimitiveTopology::eTriangleList,
-                      {},
-                      binding_description_imgui,
-                      attribute_description_imgui,
-                      // There is depth buffer fighting when enabling depth writes for imgui
-                      // probably because things are just drawn on top of each other. Therefore
-                      // disable depth writes.
-                      false /*depth_write_enable*/);
+  create_pipelines();
 
   // ImGui initialization
   init_im_gui(font_path, font_size_in_pixels);
@@ -720,6 +585,10 @@ CudaService* Vulkan::Impl::get_cuda_service() const {
   return cuda_service_.get();
 }
 
+vk::Device Vulkan::Impl::get_device() const {
+  return device_;
+}
+
 std::vector<SurfaceFormat> Vulkan::Impl::get_surface_formats() const {
   return fb_sequence_->get_surface_formats();
 }
@@ -734,6 +603,7 @@ void Vulkan::Impl::set_surface_format(SurfaceFormat surface_format) {
       create_framebuffer_sequence();
       create_render_pass();
       create_frame_buffers();
+      create_pipelines();
     }
   }
 }
@@ -1032,12 +902,12 @@ void Vulkan::Impl::create_framebuffer_sequence() {
 
   fb_sequence_.reset(new FramebufferSequence);
   fb_sequence_->init(&nvvk_.alloc_,
-                    device_,
-                    physical_device_,
-                    queue_gct_,
-                    nvvk_.vk_ctx_.m_queueGCT.familyIndex,
-                    surface_format_,
-                    surface_.get());
+                     device_,
+                     physical_device_,
+                     queue_gct_,
+                     nvvk_.vk_ctx_.m_queueGCT.familyIndex,
+                     surface_format_,
+                     surface_.get());
 
   fb_sequence_->update(size_.width, size_.height, present_mode_, &size_);
 
@@ -1130,6 +1000,135 @@ void Vulkan::Impl::create_render_pass() {
 #endif  // _DEBUG
 }
 
+void Vulkan::Impl::create_pipelines() {
+  const std::vector<vk::VertexInputBindingDescription> binding_description_float_3{
+      {0, sizeof(float) * 3, vk::VertexInputRate::eVertex}};
+  const std::vector<vk::VertexInputAttributeDescription> attribute_description_float_3{
+      {0, 0, vk::Format::eR32G32B32Sfloat, 0}};
+
+  // Create the Pipeline
+  image_pipeline_ =
+      create_pipeline(image_pipeline_layout_.get(),
+                      image_shader_glsl_vert,
+                      sizeof(image_shader_glsl_vert) / sizeof(image_shader_glsl_vert[0]),
+                      image_shader_glsl_frag,
+                      sizeof(image_shader_glsl_frag) / sizeof(image_shader_glsl_frag[0]),
+                      vk::PrimitiveTopology::eTriangleList,
+                      {},
+                      binding_description_float_3,
+                      attribute_description_float_3);
+
+  geometry_point_pipeline_ =
+      create_pipeline(geometry_pipeline_layout_.get(),
+                      geometry_shader_glsl_vert,
+                      sizeof(geometry_shader_glsl_vert) / sizeof(geometry_shader_glsl_vert[0]),
+                      geometry_shader_glsl_frag,
+                      sizeof(geometry_shader_glsl_frag) / sizeof(geometry_shader_glsl_frag[0]),
+                      vk::PrimitiveTopology::ePointList,
+                      {},
+                      binding_description_float_3,
+                      attribute_description_float_3);
+  geometry_line_pipeline_ =
+      create_pipeline(geometry_pipeline_layout_.get(),
+                      geometry_shader_glsl_vert,
+                      sizeof(geometry_shader_glsl_vert) / sizeof(geometry_shader_glsl_vert[0]),
+                      geometry_shader_glsl_frag,
+                      sizeof(geometry_shader_glsl_frag) / sizeof(geometry_shader_glsl_frag[0]),
+                      vk::PrimitiveTopology::eLineList,
+                      {vk::DynamicState::eLineWidth},
+                      binding_description_float_3,
+                      attribute_description_float_3);
+  geometry_line_strip_pipeline_ =
+      create_pipeline(geometry_pipeline_layout_.get(),
+                      geometry_shader_glsl_vert,
+                      sizeof(geometry_shader_glsl_vert) / sizeof(geometry_shader_glsl_vert[0]),
+                      geometry_shader_glsl_frag,
+                      sizeof(geometry_shader_glsl_frag) / sizeof(geometry_shader_glsl_frag[0]),
+                      vk::PrimitiveTopology::eLineStrip,
+                      {vk::DynamicState::eLineWidth},
+                      binding_description_float_3,
+                      attribute_description_float_3);
+  geometry_triangle_pipeline_ =
+      create_pipeline(geometry_pipeline_layout_.get(),
+                      geometry_shader_glsl_vert,
+                      sizeof(geometry_shader_glsl_vert) / sizeof(geometry_shader_glsl_vert[0]),
+                      geometry_shader_glsl_frag,
+                      sizeof(geometry_shader_glsl_frag) / sizeof(geometry_shader_glsl_frag[0]),
+                      vk::PrimitiveTopology::eTriangleList,
+                      {},
+                      binding_description_float_3,
+                      attribute_description_float_3);
+
+  const std::vector<vk::VertexInputBindingDescription> binding_description_float_3_uint8_4{
+      {0, sizeof(float) * 3, vk::VertexInputRate::eVertex},
+      {1, sizeof(uint8_t) * 4, vk::VertexInputRate::eVertex}};
+  const std::vector<vk::VertexInputAttributeDescription> attribute_description_float_3_uint8_4{
+      {0, 0, vk::Format::eR32G32B32Sfloat, 0}, {1, 1, vk::Format::eR8G8B8A8Unorm, 0}};
+
+  geometry_point_color_pipeline_ = create_pipeline(
+      geometry_pipeline_layout_.get(),
+      geometry_color_shader_glsl_vert,
+      sizeof(geometry_color_shader_glsl_vert) / sizeof(geometry_color_shader_glsl_vert[0]),
+      geometry_shader_glsl_frag,
+      sizeof(geometry_shader_glsl_frag) / sizeof(geometry_shader_glsl_frag[0]),
+      vk::PrimitiveTopology::ePointList,
+      {},
+      binding_description_float_3_uint8_4,
+      attribute_description_float_3_uint8_4);
+  geometry_line_color_pipeline_ = create_pipeline(
+      geometry_pipeline_layout_.get(),
+      geometry_color_shader_glsl_vert,
+      sizeof(geometry_color_shader_glsl_vert) / sizeof(geometry_color_shader_glsl_vert[0]),
+      geometry_shader_glsl_frag,
+      sizeof(geometry_shader_glsl_frag) / sizeof(geometry_shader_glsl_frag[0]),
+      vk::PrimitiveTopology::eLineList,
+      {vk::DynamicState::eLineWidth},
+      binding_description_float_3_uint8_4,
+      attribute_description_float_3_uint8_4);
+  geometry_line_strip_color_pipeline_ = create_pipeline(
+      geometry_pipeline_layout_.get(),
+      geometry_color_shader_glsl_vert,
+      sizeof(geometry_color_shader_glsl_vert) / sizeof(geometry_color_shader_glsl_vert[0]),
+      geometry_shader_glsl_frag,
+      sizeof(geometry_shader_glsl_frag) / sizeof(geometry_shader_glsl_frag[0]),
+      vk::PrimitiveTopology::eLineStrip,
+      {vk::DynamicState::eLineWidth},
+      binding_description_float_3_uint8_4,
+      attribute_description_float_3_uint8_4);
+  geometry_triangle_color_pipeline_ = create_pipeline(
+      geometry_pipeline_layout_.get(),
+      geometry_color_shader_glsl_vert,
+      sizeof(geometry_color_shader_glsl_vert) / sizeof(geometry_color_shader_glsl_vert[0]),
+      geometry_shader_glsl_frag,
+      sizeof(geometry_shader_glsl_frag) / sizeof(geometry_shader_glsl_frag[0]),
+      vk::PrimitiveTopology::eTriangleList,
+      {},
+      binding_description_float_3_uint8_4,
+      attribute_description_float_3_uint8_4);
+
+  const std::vector<vk::VertexInputBindingDescription> binding_description_imgui{
+      {0, sizeof(ImDrawVert), vk::VertexInputRate::eVertex}};
+  const std::vector<vk::VertexInputAttributeDescription> attribute_description_imgui{
+      {0, 0, vk::Format::eR32G32Sfloat, IM_OFFSETOF(ImDrawVert, pos)},
+      {1, 0, vk::Format::eR32G32Sfloat, IM_OFFSETOF(ImDrawVert, uv)},
+      {2, 0, vk::Format::eR8G8B8A8Unorm, IM_OFFSETOF(ImDrawVert, col)}};
+
+  imgui_pipeline_ =
+      create_pipeline(imgui_pipeline_layout_.get(),
+                      imgui_shader_glsl_vert,
+                      sizeof(imgui_shader_glsl_vert) / sizeof(imgui_shader_glsl_vert[0]),
+                      imgui_shader_glsl_frag,
+                      sizeof(imgui_shader_glsl_frag) / sizeof(imgui_shader_glsl_frag[0]),
+                      vk::PrimitiveTopology::eTriangleList,
+                      {},
+                      binding_description_imgui,
+                      attribute_description_imgui,
+                      // There is depth buffer fighting when enabling depth writes for imgui
+                      // probably because things are just drawn on top of each other. Therefore
+                      // disable depth writes.
+                      false /*depth_write_enable*/);
+}
+
 void Vulkan::Impl::create_frame_buffers() {
   // Recreate the frame buffers
   framebuffers_.clear();
@@ -1293,69 +1292,134 @@ vk::UniquePipeline Vulkan::Impl::create_pipeline(
   return device_.createGraphicsPipelineUnique(pipeline_cache_.get(), generator.createInfo).value;
 }
 
-Texture* Vulkan::Impl::create_texture(uint32_t width, uint32_t height, ImageFormat format,
-                                      size_t data_size, const void* data,
-                                      const vk::ComponentMapping& component_mapping,
-                                      vk::Filter filter, bool normalized, bool export_allocation) {
+std::unique_ptr<Texture> Vulkan::Impl::create_texture(Vulkan* vulkan,
+                                                      const CreateTextureArgs& args) {
   if (transfer_jobs_.empty()) {
     throw std::runtime_error(
         "Transfer command buffer not set. Calls to create_texture() need to be enclosed by "
         "begin_transfer_pass() and end_transfer_pass()");
   }
 
-  const vk::Format vk_format = to_vulkan_format(format);
-  uint32_t src_channels, dst_channels, component_size;
-  format_info(format, &src_channels, &dst_channels, &component_size);
-
-  const vk::ImageCreateInfo image_create_info =
-      nvvk::makeImage2DCreateInfo(vk::Extent2D{width, height}, vk_format);
+  const vk::Format vk_format = to_vulkan_format(args.format_);
+  vk::ImageCreateInfo image_create_info =
+      nvvk::makeImage2DCreateInfo(vk::Extent2D{args.width_, args.height_}, vk_format);
+  if (is_multi_planar_format(args.format_)) {
+    image_create_info.flags = vk::ImageCreateFlagBits::eDisjoint;
+  }
   nvvk::Image image;
   nvvk::ResourceAllocator* allocator;
-  if (export_allocation) {
+  if (args.cuda_interop_) {
     allocator = &nvvk_.export_alloc_;
   } else {
     allocator = &nvvk_.alloc_;
   }
-  if (data) {
-    if (data_size != width * height * src_channels * component_size) {
-      throw std::runtime_error("The size of the data array is wrong");
-    }
-    image = allocator->createImage(
-        transfer_jobs_.back().cmd_buffer_, data_size, data, image_create_info);
-  } else {
-    // the VkExternalMemoryImageCreateInfoKHR struct is appended by nvvk::ExportResourceAllocator
-    image = allocator->createImage(image_create_info, vk::MemoryPropertyFlagBits::eDeviceLocal);
-  }
+  // the VkExternalMemoryImageCreateInfoKHR struct is appended by nvvk::ExportResourceAllocator
+  // so we don't need to explicitly add it
+  image = allocator->createImage(image_create_info, vk::MemoryPropertyFlagBits::eDeviceLocal);
 
   // create the texture
   std::unique_ptr<Texture> texture =
-      std::make_unique<Texture>(device_, allocator, width, height, format);
+      std::make_unique<Texture>(vulkan, allocator, args.width_, args.height_, args.format_);
+
+  if (is_yuv_format(args.format_)) {
+    vk::SamplerYcbcrConversionCreateInfo sampler_ycbcr_conversion_create_info;
+    sampler_ycbcr_conversion_create_info.ycbcrModel = args.ycbcr_model_conversion_;
+    sampler_ycbcr_conversion_create_info.ycbcrRange = args.ycbcr_range_;
+    sampler_ycbcr_conversion_create_info.chromaFilter = args.filter_;
+    sampler_ycbcr_conversion_create_info.components = args.component_mapping_;
+    sampler_ycbcr_conversion_create_info.xChromaOffset = args.x_chroma_location_;
+    sampler_ycbcr_conversion_create_info.yChromaOffset = args.y_chroma_location_;
+    sampler_ycbcr_conversion_create_info.format = vk_format;
+    texture->sampler_ycbcr_conversion_ =
+        device_.createSamplerYcbcrConversionUnique(sampler_ycbcr_conversion_create_info);
+  }
 
   // create the Vulkan texture
   vk::SamplerCreateInfo sampler_create_info;
-  sampler_create_info.minFilter = filter;
-  sampler_create_info.magFilter = filter;
+  sampler_create_info.minFilter = args.filter_;
+  sampler_create_info.magFilter = args.filter_;
   sampler_create_info.mipmapMode = vk::SamplerMipmapMode::eNearest;
   sampler_create_info.addressModeU = vk::SamplerAddressMode::eClampToEdge;
   sampler_create_info.addressModeV = vk::SamplerAddressMode::eClampToEdge;
   sampler_create_info.addressModeW = vk::SamplerAddressMode::eClampToEdge;
-  sampler_create_info.maxLod = normalized ? FLT_MAX : 0;
-  sampler_create_info.unnormalizedCoordinates = normalized ? false : true;
+  sampler_create_info.maxLod = args.normalized_ ? FLT_MAX : 0;
+  sampler_create_info.unnormalizedCoordinates = args.normalized_ ? false : true;
 
   vk::ImageViewCreateInfo image_view_info =
       nvvk::makeImageViewCreateInfo(image.image, image_create_info);
-  image_view_info.components = component_mapping;
+  // for Y'CbCr the component mapping of SamplerYcbcrConversion and is set there, so only set it
+  // here for non-Y'CbCr formats
+  if (!is_yuv_format(args.format_)) { image_view_info.components = args.component_mapping_; }
+
+  vk::SamplerYcbcrConversionInfo sampler_ycbcr_conversion;
+  if (texture->sampler_ycbcr_conversion_) {
+    sampler_ycbcr_conversion.conversion = texture->sampler_ycbcr_conversion_.get();
+    assert(sampler_create_info.pNext == nullptr);
+    sampler_create_info.pNext = &sampler_ycbcr_conversion;
+    assert(image_view_info.pNext == nullptr);
+    image_view_info.pNext = &sampler_ycbcr_conversion;
+  }
+
   texture->texture_ = allocator->createTexture(image, image_view_info, sampler_create_info);
 
-  // transition to shader layout
-  /// @todo I don't know if this is defined. Should the old layout be
-  /// vk::ImageLayout::eTransferDstOptimal, like it would be if we uploaded using Vulkan?
-  nvvk::cmdBarrierImageLayout(transfer_jobs_.back().cmd_buffer_,
-                              image.image,
-                              image_create_info.initialLayout,
-                              vk::ImageLayout::eShaderReadOnlyOptimal);
+  // if this is a Y'CbCr texture we need to create a descriptor set that must be allocated with a
+  // layout that includes an immutable sampler
+  if (is_yuv_format(args.format_)) {
+    // start with the normal descriptor set bindings set the immutable sampler
+    texture->desc_set_layout_bind_ = desc_set_layout_bind_;
+    VkDescriptorSetLayoutBinding* bindings = texture->desc_set_layout_bind_.data();
+    for (size_t index = 0; index < texture->desc_set_layout_bind_.size(); ++index) {
+      if (bindings[index].binding == SAMPLE_BINDING_COLOR) {
+        bindings[index].pImmutableSamplers = &texture->texture_.descriptor.sampler;
+        break;
+      }
+    }
+    texture->desc_set_layout_ = vk::UniqueDescriptorSetLayout(
+        texture->desc_set_layout_bind_.createLayout(
+            device_, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR),
+        device_);
+
+    // Push constants
+    vk::PushConstantRange push_constant_ranges[2];
+    push_constant_ranges[0].stageFlags = vk::ShaderStageFlagBits::eVertex;
+    push_constant_ranges[0].offset = 0;
+    push_constant_ranges[0].size = sizeof(PushConstantVertex);
+    push_constant_ranges[1].stageFlags = vk::ShaderStageFlagBits::eFragment;
+    push_constant_ranges[1].offset = sizeof(PushConstantVertex);
+    push_constant_ranges[1].size = sizeof(PushConstantFragment);
+
+    // create the pipeline layout for images
+    {
+      // Creating the Pipeline Layout
+      vk::PipelineLayoutCreateInfo create_info;
+      create_info.setLayoutCount = 1;
+      create_info.pSetLayouts = &texture->desc_set_layout_.get();
+      create_info.pushConstantRangeCount = 2;
+      create_info.pPushConstantRanges = push_constant_ranges;
+      texture->pipeline_layout_ = device_.createPipelineLayoutUnique(create_info);
+    }
 
-  return texture.release();
+    const std::vector<vk::VertexInputBindingDescription> binding_description_float_3{
+        {0, sizeof(float) * 3, vk::VertexInputRate::eVertex}};
+    const std::vector<vk::VertexInputAttributeDescription> attribute_description_float_3{
+        {0, 0, vk::Format::eR32G32B32Sfloat, 0}};
+
+    // Create the Pipeline
+    texture->pipeline_ =
+        create_pipeline(texture->pipeline_layout_.get(),
+                        image_shader_glsl_vert,
+                        sizeof(image_shader_glsl_vert) / sizeof(image_shader_glsl_vert[0]),
+                        image_shader_glsl_frag,
+                        sizeof(image_shader_glsl_frag) / sizeof(image_shader_glsl_frag[0]),
+                        vk::PrimitiveTopology::eTriangleList,
+                        {},
+                        binding_description_float_3,
+                        attribute_description_float_3);
+  }
+
+  if (args.cuda_interop_) { texture->import_to_cuda(cuda_service_); }
+
+  return texture;
 }
 
 void Vulkan::Impl::set_viewport(float x, float y, float width, float height) {
@@ -1380,125 +1444,142 @@ void Vulkan::Impl::set_viewport(float x, float y, float width, float height) {
   cmd_buf.setScissor(0, scissor);
 }
 
-Texture* Vulkan::Impl::create_texture_for_cuda_interop(
-    uint32_t width, uint32_t height, ImageFormat format,
-    const vk::ComponentMapping& component_mapping, vk::Filter filter, bool normalized) {
+void Vulkan::Impl::upload_to_texture(Texture* texture, const std::array<const void*, 3>& host_ptr,
+                                     const std::array<size_t, 3>& row_pitch) {
   if (transfer_jobs_.empty()) {
     throw std::runtime_error(
-        "Transfer command buffer not set. Calls to create_texture_for_cuda_interop() "
-        "need to be enclosed by "
-        "begin_transfer_pass() and "
-        "end_transfer_pass()");
+        "Transfer command buffer not set. Calls to upload_to_texture() need to be enclosed by "
+        "begin_transfer_pass() and end_transfer_pass()");
   }
 
-  std::unique_ptr<Texture> texture;
-  texture.reset(create_texture(width,
-                               height,
-                               format,
-                               0,
-                               nullptr,
-                               component_mapping,
-                               filter,
-                               normalized,
-                               true /*export_allocation*/));
-
-  texture->import_to_cuda(cuda_service_);
-
-  return texture.release();
-}
-
-void Vulkan::Impl::upload_to_texture(CUdeviceptr device_ptr, size_t row_pitch, Texture* texture,
-                                     CUstream ext_stream) {
-  if (!texture->mipmap_) {
-    throw std::runtime_error("Texture had not been imported to CUDA, can't upload data.");
+  if ((texture->state_ != Texture::AccessState::VULKAN) &&
+      (texture->state_ != Texture::AccessState::UNKNOWN)) {
+    throw std::runtime_error(
+        "When uploading to texture, the texture should be in Vulkan "
+        "or unknown state");
   }
 
-  const CudaService::ScopedPush cuda_context = cuda_service_->PushContext();
-
-  // select the stream to be used by CUDA operations
-  const CUstream stream = cuda_service_->select_cuda_stream(ext_stream);
-
-  // start accessing the texture with CUDA
-  texture->begin_access_with_cuda(stream);
-
-  CUarray array;
-  CudaCheck(cuMipmappedArrayGetLevel(&array, texture->mipmap_.get(), 0));
-
-  uint32_t src_channels, dst_channels, component_size;
-  format_info(texture->format_, &src_channels, &dst_channels, &component_size);
-  size_t src_pitch = row_pitch != 0 ? row_pitch : texture->width_ * src_channels * component_size;
+  const vk::CommandBuffer cmd_buf = transfer_jobs_.back().cmd_buffer_;
 
-  if (src_channels != dst_channels) {
-    // three channel texture data is not hardware natively supported, convert to four channel
-    if ((src_channels != 3) || (dst_channels != 4) || (component_size != 1)) {
-      throw std::runtime_error("Unhandled conversion.");
+  // transition the image to eTransferDstOptimal so we can copy to it from a buffer
+  vk::ImageSubresourceRange subresource_range;
+  if (is_multi_planar_format(texture->format_)) {
+    for (uint32_t plane = 0; plane < host_ptr.size(); ++plane) {
+      if (!host_ptr[plane]) { continue; }
+      subresource_range.aspectMask |=
+          vk::ImageAspectFlagBits(int(vk::ImageAspectFlagBits::ePlane0) << plane);
     }
+  } else if (is_depth_format(texture->format_)) {
+    subresource_range.aspectMask = vk::ImageAspectFlagBits::eDepth;
+  } else {
+    subresource_range.aspectMask = vk::ImageAspectFlagBits::eColor;
+  }
+  subresource_range.layerCount = 1;
+  subresource_range.levelCount = 1;
 
-    // if the source CUDA memory is on a different device, allocate temporary memory, copy from
-    // the source memory to the temporary memory and start the convert kernel using the temporary
-    // memory
-    UniqueAsyncCUdeviceptr tmp_device_ptr;
-    if (!cuda_service_->IsMemOnDevice(device_ptr)) {
-      const size_t tmp_pitch = texture->width_ * src_channels * component_size;
-
-      // allocate temporary memory, note this is using the stream ordered memory allocator which
-      // is not syncing globally like the normal `cuMemAlloc`
-      tmp_device_ptr.reset([tmp_pitch, texture, stream] {
-        CUdeviceptr device_ptr;
-        CudaCheck(cuMemAllocAsync(&device_ptr, tmp_pitch * texture->height_, stream));
-        return std::pair<CUdeviceptr, CUstream>(device_ptr, stream);
-      }());
-
-      CUDA_MEMCPY2D memcpy_2d{};
-      memcpy_2d.srcMemoryType = CU_MEMORYTYPE_DEVICE;
-      memcpy_2d.srcDevice = device_ptr;
-      memcpy_2d.srcPitch = src_pitch;
-      memcpy_2d.dstMemoryType = CU_MEMORYTYPE_DEVICE;
-      memcpy_2d.dstDevice = tmp_device_ptr.get().first;
-      memcpy_2d.dstPitch = tmp_pitch;
-      memcpy_2d.WidthInBytes = tmp_pitch;
-      memcpy_2d.Height = texture->height_;
-      CudaCheck(cuMemcpy2DAsync(&memcpy_2d, stream));
-
-      device_ptr = tmp_device_ptr.get().first;
-      src_pitch = tmp_pitch;
-    }
+  nvvk::cmdBarrierImageLayout(cmd_buf,
+                              texture->texture_.image,
+                              vk::ImageLayout::eUndefined,
+                              vk::ImageLayout::eTransferDstOptimal,
+                              subresource_range);
 
-    uint8_t alpha;
-    switch (texture->format_) {
-      case ImageFormat::R8G8B8_UNORM:
-      case ImageFormat::R8G8B8_SRGB:
-        alpha = 0xFf;
-        break;
-      case ImageFormat::R8G8B8_SNORM:
-        alpha = 0x7f;
-        break;
-      default:
-        throw std::runtime_error("Unhandled format.");
+  for (uint32_t plane = 0; plane < host_ptr.size(); ++plane) {
+    if (!host_ptr[plane]) { continue; }
+
+    uint32_t channels, hw_channels, component_size, width_divisor, height_divisior;
+    format_info(texture->format_,
+                &channels,
+                &hw_channels,
+                &component_size,
+                &width_divisor,
+                &height_divisior,
+                plane);
+
+    // the width and height might be different for each plane for Y'CbCr formats
+    const uint32_t width = texture->width_ / width_divisor;
+    const uint32_t height = texture->height_ / height_divisior;
+
+    vk::Offset3D offset;
+    vk::ImageSubresourceLayers image_subresource_layers;
+    if (is_multi_planar_format(texture->format_)) {
+      image_subresource_layers.aspectMask =
+          vk::ImageAspectFlagBits(int(vk::ImageAspectFlagBits::ePlane0) << plane);
+    } else if (is_depth_format(texture->format_)) {
+      image_subresource_layers.aspectMask = vk::ImageAspectFlagBits::eDepth;
+    } else {
+      image_subresource_layers.aspectMask = vk::ImageAspectFlagBits::eColor;
+    }
+    image_subresource_layers.layerCount = 1;
+
+    const uint32_t src_pitch = width * channels * component_size;
+    const uint32_t dst_pitch = width * hw_channels * component_size;
+    const vk::DeviceSize data_size = dst_pitch * height;
+
+    void* mapping =
+        nvvk_.alloc_.getStaging()->cmdToImage(cmd_buf,
+                                              texture->texture_.image,
+                                              VkOffset3D(offset),
+                                              VkExtent3D{width, height, 1},
+                                              VkImageSubresourceLayers(image_subresource_layers),
+                                              data_size,
+                                              nullptr);
+
+    if (channels != hw_channels) {
+      // three channel texture data is not hardware natively supported, convert to four channel
+      if ((channels != 3) || (hw_channels != 4) || (component_size != 1)) {
+        throw std::runtime_error("Unhandled conversion.");
+      }
+      const uint8_t* src = reinterpret_cast<const uint8_t*>(host_ptr[plane]);
+      uint32_t* dst = reinterpret_cast<uint32_t*>(mapping);
+      uint8_t alpha;
+      switch (texture->format_) {
+        case ImageFormat::R8G8B8_UNORM:
+        case ImageFormat::R8G8B8_SRGB:
+          alpha = 0xFf;
+          break;
+        case ImageFormat::R8G8B8_SNORM:
+          alpha = 0x7f;
+          break;
+        default:
+          throw std::runtime_error("Unhandled format.");
+      }
+      for (uint32_t y = 0; y < height; ++y) {
+        for (uint32_t x = 0; x < width; ++x) {
+          const uint8_t data[4]{src[0], src[1], src[2], alpha};
+          *dst = *reinterpret_cast<const uint32_t*>(&data);
+          src += 3;
+          ++dst;
+        }
+        if (row_pitch[plane] != 0) { src += row_pitch[plane] - src_pitch; }
+      }
+    } else {
+      if ((row_pitch[plane] == 0) || (row_pitch[plane] == dst_pitch)) {
+        // contiguous copy
+        memcpy(mapping, host_ptr[plane], data_size);
+      } else {
+        // source and destination pitch is different, copy row by row
+        const uint8_t* src = reinterpret_cast<const uint8_t*>(host_ptr[plane]);
+        uint8_t* dst = reinterpret_cast<uint8_t*>(mapping);
+        for (uint32_t y = 0; y < height; ++y) {
+          memcpy(dst, src, dst_pitch);
+          src += row_pitch[plane];
+          dst += dst_pitch;
+        }
+      }
     }
-
-    ConvertR8G8B8ToR8G8B8A8(
-        texture->width_, texture->height_, device_ptr, src_pitch, array, stream, alpha);
-  } else {
-    // else just copy
-    CUDA_MEMCPY2D memcpy_2d{};
-    memcpy_2d.srcMemoryType = CU_MEMORYTYPE_DEVICE;
-    memcpy_2d.srcDevice = device_ptr;
-    memcpy_2d.srcPitch = src_pitch;
-    memcpy_2d.dstMemoryType = CU_MEMORYTYPE_ARRAY;
-    memcpy_2d.dstArray = array;
-    memcpy_2d.WidthInBytes = texture->width_ * dst_channels * component_size;
-    memcpy_2d.Height = texture->height_;
-    CudaCheck(cuMemcpy2DAsync(&memcpy_2d, stream));
   }
 
-  // indicate that the texture had been used by CUDA
-  texture->end_access_with_cuda(stream);
+  // Setting final image layout
+  nvvk::cmdBarrierImageLayout(cmd_buf,
+                              texture->texture_.image,
+                              vk::ImageLayout::eTransferDstOptimal,
+                              vk::ImageLayout::eShaderReadOnlyOptimal);
 
-  CudaService::sync_with_selected_stream(ext_stream, stream);
+  // no need to set the texture state here, the transfer command buffer submission is
+  // always synchronized to the render command buffer submission.
 }
 
-void Vulkan::Impl::upload_to_texture(const void* host_ptr, size_t row_pitch, Texture* texture) {
+void Vulkan::Impl::upload_to_texture(Texture* texture, const std::array<Buffer*, 3>& buffers) {
   if (transfer_jobs_.empty()) {
     throw std::runtime_error(
         "Transfer command buffer not set. Calls to upload_to_texture() need to be enclosed by "
@@ -1514,11 +1595,19 @@ void Vulkan::Impl::upload_to_texture(const void* host_ptr, size_t row_pitch, Tex
 
   const vk::CommandBuffer cmd_buf = transfer_jobs_.back().cmd_buffer_;
 
-  // Copy buffer to image
+  // transition the image to eTransferDstOptimal so we can copy to it from a buffer
   vk::ImageSubresourceRange subresource_range;
-  subresource_range.aspectMask = vk::ImageAspectFlagBits::eColor;
-  subresource_range.baseArrayLayer = 0;
-  subresource_range.baseMipLevel = 0;
+  if (is_multi_planar_format(texture->format_)) {
+    for (uint32_t plane = 0; plane < buffers.size(); ++plane) {
+      if (!buffers[plane]) { continue; }
+      subresource_range.aspectMask |=
+          vk::ImageAspectFlagBits(int(vk::ImageAspectFlagBits::ePlane0) << plane);
+    }
+  } else if (is_depth_format(texture->format_)) {
+    subresource_range.aspectMask = vk::ImageAspectFlagBits::eDepth;
+  } else {
+    subresource_range.aspectMask = vk::ImageAspectFlagBits::eColor;
+  }
   subresource_range.layerCount = 1;
   subresource_range.levelCount = 1;
 
@@ -1528,72 +1617,46 @@ void Vulkan::Impl::upload_to_texture(const void* host_ptr, size_t row_pitch, Tex
                               vk::ImageLayout::eTransferDstOptimal,
                               subresource_range);
 
-  vk::Offset3D offset;
-  vk::ImageSubresourceLayers subresource;
-  subresource.aspectMask = vk::ImageAspectFlagBits::eColor;
-  subresource.layerCount = 1;
-
-  uint32_t src_channels, dst_channels, component_size;
-  format_info(texture->format_, &src_channels, &dst_channels, &component_size);
-
-  const uint32_t src_pitch = texture->width_ * src_channels * component_size;
-  const uint32_t dst_pitch = texture->width_ * dst_channels * component_size;
-  const vk::DeviceSize data_size = dst_pitch * texture->height_;
-
-  void* mapping =
-      nvvk_.alloc_.getStaging()->cmdToImage(cmd_buf,
-                                            texture->texture_.image,
-                                            VkOffset3D(offset),
-                                            VkExtent3D{texture->width_, texture->height_, 1},
-                                            VkImageSubresourceLayers(subresource),
-                                            data_size,
-                                            nullptr);
-
-  if (src_channels != dst_channels) {
-    // three channel texture data is not hardware natively supported, convert to four channel
-    if ((src_channels != 3) || (dst_channels != 4) || (component_size != 1)) {
-      throw std::runtime_error("Unhandled conversion.");
-    }
-    const uint8_t* src = reinterpret_cast<const uint8_t*>(host_ptr);
-    uint32_t* dst = reinterpret_cast<uint32_t*>(mapping);
-    uint8_t alpha;
-    switch (texture->format_) {
-      case ImageFormat::R8G8B8_UNORM:
-      case ImageFormat::R8G8B8_SRGB:
-        alpha = 0xFf;
-        break;
-      case ImageFormat::R8G8B8_SNORM:
-        alpha = 0x7f;
-        break;
-      default:
-        throw std::runtime_error("Unhandled format.");
-    }
-    for (uint32_t y = 0; y < texture->height_; ++y) {
-      for (uint32_t x = 0; x < texture->width_; ++x) {
-        const uint8_t data[4]{src[0], src[1], src[2], alpha};
-        *dst = *reinterpret_cast<const uint32_t*>(&data);
-        src += 3;
-        ++dst;
-      }
-      if (row_pitch != 0) { src += row_pitch - src_pitch; }
-    }
-  } else {
-    if ((row_pitch == 0) || (row_pitch == dst_pitch)) {
-      // contiguous copy
-      memcpy(mapping, host_ptr, data_size);
+  for (uint32_t plane = 0; plane < buffers.size(); ++plane) {
+    if (!buffers[plane]) { continue; }
+
+    uint32_t channels, hw_channels, component_size, width_divisor, height_divisior;
+    format_info(texture->format_,
+                &channels,
+                &hw_channels,
+                &component_size,
+                &width_divisor,
+                &height_divisior,
+                plane);
+
+    // the width and height might be different for each plane for Y'CbCr formats
+    const uint32_t width = texture->width_ / width_divisor;
+    const uint32_t height = texture->height_ / height_divisior;
+
+    // now copy from the buffer to the image
+    vk::ImageSubresourceLayers image_subresource_layers;
+    if (is_multi_planar_format(texture->format_)) {
+      image_subresource_layers.aspectMask =
+          vk::ImageAspectFlagBits(int(vk::ImageAspectFlagBits::ePlane0) << plane);
+    } else if (is_depth_format(texture->format_)) {
+      image_subresource_layers.aspectMask = vk::ImageAspectFlagBits::eDepth;
     } else {
-      // source and destination pitch is different, copy row by row
-      const uint8_t* src = reinterpret_cast<const uint8_t*>(host_ptr);
-      uint8_t* dst = reinterpret_cast<uint8_t*>(mapping);
-      for (uint32_t y = 0; y < texture->height_; ++y) {
-        memcpy(dst, src, dst_pitch);
-        src += row_pitch;
-        dst += dst_pitch;
-      }
+      image_subresource_layers.aspectMask = vk::ImageAspectFlagBits::eColor;
     }
+    image_subresource_layers.layerCount = 1;
+
+    vk::BufferImageCopy buffer_image_copy;
+    buffer_image_copy.imageSubresource = image_subresource_layers;
+    buffer_image_copy.imageExtent = vk::Extent3D{width, height, 1};
+
+    cmd_buf.copyBufferToImage(vk::Buffer(buffers[plane]->buffer_.buffer),
+                              vk::Image(texture->texture_.image),
+                              vk::ImageLayout::eTransferDstOptimal,
+                              1,
+                              &buffer_image_copy);
   }
 
-  // Setting final image layout
+  // transition back to shader optimal layout since we will now use that image for rendering
   nvvk::cmdBarrierImageLayout(cmd_buf,
                               texture->texture_.image,
                               vk::ImageLayout::eTransferDstOptimal,
@@ -1603,8 +1666,9 @@ void Vulkan::Impl::upload_to_texture(const void* host_ptr, size_t row_pitch, Tex
   // always synchronized to the render command buffer submission.
 }
 
-Buffer* Vulkan::Impl::create_buffer(size_t data_size, vk::BufferUsageFlags usage,
-                                    bool export_allocation, const void* data) {
+std::unique_ptr<Buffer> Vulkan::Impl::create_buffer(Vulkan* vulkan, size_t data_size,
+                                                    vk::BufferUsageFlags usage,
+                                                    bool export_allocation, const void* data) {
   nvvk::ResourceAllocator* allocator;
   if (export_allocation) {
     /// @TODO Use the dedicted allocator. Without it there is corruption with the depth map
@@ -1616,7 +1680,7 @@ Buffer* Vulkan::Impl::create_buffer(size_t data_size, vk::BufferUsageFlags usage
     allocator = &nvvk_.alloc_;
   }
 
-  std::unique_ptr<Buffer> buffer(new Buffer(device_, allocator, data_size));
+  std::unique_ptr<Buffer> buffer(new Buffer(vulkan, allocator, data_size));
   if (data) {
     if (transfer_jobs_.empty()) {
       throw std::runtime_error(
@@ -1631,17 +1695,19 @@ Buffer* Vulkan::Impl::create_buffer(size_t data_size, vk::BufferUsageFlags usage
         static_cast<vk::DeviceSize>(data_size), usage, vk::MemoryPropertyFlagBits::eDeviceLocal);
   }
 
-  return buffer.release();
+  return buffer;
 }
 
-Buffer* Vulkan::Impl::create_buffer_for_cuda_interop(size_t data_size, vk::BufferUsageFlags usage) {
-  std::unique_ptr<Buffer> buffer;
-  buffer.reset(create_buffer(data_size, usage, true /*export_allocation*/));
+std::unique_ptr<Buffer> Vulkan::Impl::create_buffer_for_cuda_interop(Vulkan* vulkan,
+                                                                     size_t data_size,
+                                                                     vk::BufferUsageFlags usage) {
+  std::unique_ptr<Buffer> buffer =
+      create_buffer(vulkan, data_size, usage, true /*export_allocation*/);
 
   // import buffer to CUDA
   buffer->import_to_cuda(cuda_service_);
 
-  return buffer.release();
+  return buffer;
 }
 
 void Vulkan::Impl::upload_to_buffer(size_t data_size, CUdeviceptr device_ptr, Buffer* buffer,
@@ -1693,6 +1759,14 @@ void Vulkan::Impl::draw_texture(Texture* texture, Texture* depth_texture, Textur
   PushConstantFragment push_constants;
   push_constants.flags = 0;
 
+  // if this is a Y'CbCr texture it has its own pipeline, else use the default image pipeline
+  const vk::Pipeline pipeline =
+      texture->pipeline_ ? texture->pipeline_.get() : image_pipeline_.get();
+  const vk::PipelineLayout pipeline_layout =
+      texture->pipeline_layout_ ? texture->pipeline_layout_.get() : image_pipeline_layout_.get();
+  const nvvk::DescriptorSetBindings& desc_set_layout_bind =
+      texture->desc_set_layout_ ? texture->desc_set_layout_bind_ : desc_set_layout_bind_;
+
   // update descriptor sets
   std::vector<vk::WriteDescriptorSet> writes;
   uint32_t color_sample_binding = SAMPLE_BINDING_COLOR;
@@ -1712,34 +1786,34 @@ void Vulkan::Impl::draw_texture(Texture* texture, Texture* depth_texture, Textur
       push_constants.flags |= PUSH_CONSTANT_FRAGMENT_FLAG_LUT;
     }
     writes.emplace_back(
-        desc_set_layout_bind_.makeWrite(nullptr, SAMPLE_BINDING_LUT, &lut->texture_.descriptor));
+        desc_set_layout_bind.makeWrite(nullptr, SAMPLE_BINDING_LUT, &lut->texture_.descriptor));
   } else {
     push_constants.flags |= PUSH_CONSTANT_FRAGMENT_FLAG_COLOR;
   }
 
   texture->access_with_vulkan(nvvk_.batch_submission_);
-  writes.emplace_back(desc_set_layout_bind_.makeWrite(
-      nullptr, color_sample_binding, &texture->texture_.descriptor));
+  writes.emplace_back(
+      desc_set_layout_bind.makeWrite(nullptr, color_sample_binding, &texture->texture_.descriptor));
 
   if (depth_texture) {
     depth_texture->access_with_vulkan(nvvk_.batch_submission_);
 
-    writes.emplace_back(desc_set_layout_bind_.makeWrite(
+    writes.emplace_back(desc_set_layout_bind.makeWrite(
         nullptr, SAMPLE_BINDING_DEPTH, &depth_texture->texture_.descriptor));
     push_constants.flags |= PUSH_CONSTANT_FRAGMENT_FLAG_DEPTH;
   }
 
-  cmd_buf.bindPipeline(vk::PipelineBindPoint::eGraphics, image_pipeline_.get());
+  cmd_buf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline);
 
   cmd_buf.pushDescriptorSetKHR(vk::PipelineBindPoint::eGraphics,
-                               image_pipeline_layout_.get(),
+                               pipeline_layout,
                                0,
                                static_cast<uint32_t>(writes.size()),
                                writes.data());
 
   // push the constants
   push_constants.opacity = opacity;
-  cmd_buf.pushConstants(image_pipeline_layout_.get(),
+  cmd_buf.pushConstants(pipeline_layout,
                         vk::ShaderStageFlagBits::eFragment,
                         sizeof(PushConstantVertex),
                         sizeof(PushConstantFragment),
@@ -1747,7 +1821,7 @@ void Vulkan::Impl::draw_texture(Texture* texture, Texture* depth_texture, Textur
 
   PushConstantVertex push_constant_vertex;
   push_constant_vertex.matrix = view_matrix;
-  cmd_buf.pushConstants(image_pipeline_layout_.get(),
+  cmd_buf.pushConstants(pipeline_layout,
                         vk::ShaderStageFlagBits::eVertex,
                         0,
                         sizeof(PushConstantVertex),
@@ -1991,9 +2065,9 @@ void Vulkan::Impl::draw_indexed(vk::PrimitiveTopology topology,
                view_matrix);
 }
 
-void Vulkan::Impl::read_framebuffer(ImageFormat fmt, uint32_t width, uint32_t height,
-                                    size_t buffer_size, CUdeviceptr device_ptr, CUstream ext_stream,
-                                    size_t row_pitch) {
+void Vulkan::Impl::read_framebuffer(Vulkan* vulkan, ImageFormat fmt, uint32_t width,
+                                    uint32_t height, size_t buffer_size, CUdeviceptr device_ptr,
+                                    CUstream ext_stream, size_t row_pitch) {
   ReadTransferType transfer_type;
   vk::Image image;
   vk::Format image_format;
@@ -2025,22 +2099,23 @@ void Vulkan::Impl::read_framebuffer(ImageFormat fmt, uint32_t width, uint32_t he
   TransferJob& read_job = read_transfer_jobs_[transfer_type];
 
   const vk::Format out_vk_format = to_vulkan_format(fmt);
-  uint32_t src_channels, dst_channels, component_size;
-  format_info(fmt, &src_channels, &dst_channels, &component_size);
+  uint32_t channels, hw_channels, component_size;
+  format_info(fmt, &channels, &hw_channels, &component_size);
 
   // limit size to actual framebuffer size
   const uint32_t read_width = std::min(size_.width, width);
   const uint32_t read_height = std::min(size_.height, height);
 
-  const size_t data_size = read_width * read_height * dst_channels * component_size;
+  const size_t data_size = read_width * read_height * hw_channels * component_size;
   if (buffer_size < data_size) { throw std::runtime_error("The size of the buffer is too small"); }
 
   // allocate the transfer buffer if needed
-  const size_t src_data_size = read_width * read_height * src_channels * component_size;
+  const size_t src_data_size = read_width * read_height * channels * component_size;
   if (!read_transfer_buffers_[transfer_type] ||
       (read_transfer_buffers_[transfer_type]->size_ < src_data_size)) {
-    read_transfer_buffers_[transfer_type].reset(
-        create_buffer_for_cuda_interop(src_data_size, vk::BufferUsageFlagBits::eTransferDst));
+    read_transfer_buffers_[transfer_type].reset();
+    read_transfer_buffers_[transfer_type] = create_buffer_for_cuda_interop(
+        vulkan, src_data_size, vk::BufferUsageFlagBits::eTransferDst);
   }
   Buffer* read_transfer_buffer = read_transfer_buffers_[transfer_type].get();
 
@@ -2121,7 +2196,7 @@ void Vulkan::Impl::read_framebuffer(ImageFormat fmt, uint32_t width, uint32_t he
 
     // select the stream to be used by CUDA operations
     const CUstream stream = cuda_service_->select_cuda_stream(ext_stream);
-    const size_t dst_pitch = row_pitch ? row_pitch : width * dst_channels * component_size;
+    const size_t dst_pitch = row_pitch ? row_pitch : width * hw_channels * component_size;
 
     // synchronize with the Vulkan copy
     read_transfer_buffer->begin_access_with_cuda(stream);
@@ -2151,7 +2226,7 @@ void Vulkan::Impl::read_framebuffer(ImageFormat fmt, uint32_t width, uint32_t he
       ConvertB8G8R8A8ToR8G8B8A8(read_width,
                                 read_height,
                                 read_transfer_buffer->device_ptr_.get(),
-                                read_width * src_channels * component_size,
+                                read_width * channels * component_size,
                                 dst_device_ptr,
                                 dst_pitch,
                                 stream);
@@ -2173,7 +2248,7 @@ void Vulkan::Impl::read_framebuffer(ImageFormat fmt, uint32_t width, uint32_t he
       CUDA_MEMCPY2D memcpy2d{};
       memcpy2d.srcMemoryType = CU_MEMORYTYPE_DEVICE;
       memcpy2d.srcDevice = read_transfer_buffer->device_ptr_.get();
-      memcpy2d.srcPitch = read_width * src_channels * component_size;
+      memcpy2d.srcPitch = read_width * channels * component_size;
       memcpy2d.dstMemoryType = CU_MEMORYTYPE_DEVICE;
       memcpy2d.dstDevice = device_ptr;
       memcpy2d.dstPitch = dst_pitch;
@@ -2206,6 +2281,10 @@ CudaService* Vulkan::get_cuda_service() const {
   return impl_->get_cuda_service();
 }
 
+vk::Device Vulkan::get_device() const {
+  return impl_->get_device();
+}
+
 std::vector<SurfaceFormat> Vulkan::get_surface_formats() const {
   return impl_->get_surface_formats();
 }
@@ -2246,48 +2325,27 @@ void Vulkan::set_viewport(float x, float y, float width, float height) {
   impl_->set_viewport(x, y, width, height);
 }
 
-Texture* Vulkan::create_texture_for_cuda_interop(uint32_t width, uint32_t height,
-                                                 ImageFormat format,
-                                                 const vk::ComponentMapping& component_mapping,
-                                                 vk::Filter filter, bool normalized) {
-  return impl_->create_texture_for_cuda_interop(
-      width, height, format, component_mapping, filter, normalized);
+std::unique_ptr<Texture> Vulkan::create_texture(const CreateTextureArgs& args) {
+  return impl_->create_texture(this, args);
 }
 
-Texture* Vulkan::create_texture(uint32_t width, uint32_t height, ImageFormat format,
-                                size_t data_size, const void* data,
-                                const vk::ComponentMapping& component_mapping, vk::Filter filter,
-                                bool normalized) {
-  return impl_->create_texture(width,
-                               height,
-                               format,
-                               data_size,
-                               data,
-                               component_mapping,
-                               filter,
-                               normalized,
-                               false /*export_allocation*/);
+void Vulkan::upload_to_texture(Texture* texture, const std::array<const void*, 3>& host_ptr,
+                               const std::array<size_t, 3>& row_pitch) {
+  impl_->upload_to_texture(texture, host_ptr, row_pitch);
 }
 
-void Vulkan::destroy_texture(Texture* texture) {
-  delete texture;
+void Vulkan::upload_to_texture(Texture* texture, const std::array<Buffer*, 3>& buffers) {
+  impl_->upload_to_texture(texture, buffers);
 }
 
-void Vulkan::upload_to_texture(CUdeviceptr device_ptr, size_t row_pitch, Texture* texture,
-                               CUstream stream) {
-  impl_->upload_to_texture(device_ptr, row_pitch, texture, stream);
+std::unique_ptr<Buffer> Vulkan::create_buffer(size_t data_size, const void* data,
+                                              vk::BufferUsageFlags usage) {
+  return impl_->create_buffer(this, data_size, usage, false, data);
 }
 
-void Vulkan::upload_to_texture(const void* host_ptr, size_t row_pitch, Texture* texture) {
-  impl_->upload_to_texture(host_ptr, row_pitch, texture);
-}
-
-Buffer* Vulkan::create_buffer(size_t data_size, const void* data, vk::BufferUsageFlags usage) {
-  return impl_->create_buffer(data_size, usage, false, data);
-}
-
-Buffer* Vulkan::create_buffer_for_cuda_interop(size_t data_size, vk::BufferUsageFlags usage) {
-  return impl_->create_buffer_for_cuda_interop(data_size, usage);
+std::unique_ptr<Buffer> Vulkan::create_buffer_for_cuda_interop(size_t data_size,
+                                                               vk::BufferUsageFlags usage) {
+  return impl_->create_buffer_for_cuda_interop(this, data_size, usage);
 }
 
 void Vulkan::upload_to_buffer(size_t data_size, CUdeviceptr device_ptr, Buffer* buffer,
@@ -2299,10 +2357,6 @@ void Vulkan::upload_to_buffer(size_t data_size, const void* data, const Buffer*
   return impl_->upload_to_buffer(data_size, data, buffer);
 }
 
-void Vulkan::destroy_buffer(Buffer* buffer) {
-  delete buffer;
-}
-
 void Vulkan::draw_texture(Texture* texture, Texture* depth_texture, Texture* lut, float opacity,
                           const nvmath::mat4f& view_matrix) {
   impl_->draw_texture(texture, depth_texture, lut, opacity, view_matrix);
@@ -2351,7 +2405,7 @@ void Vulkan::draw_indexed(vk::PrimitiveTopology topology,
 
 void Vulkan::read_framebuffer(ImageFormat fmt, uint32_t width, uint32_t height, size_t buffer_size,
                               CUdeviceptr buffer, CUstream stream, size_t row_pitch) {
-  impl_->read_framebuffer(fmt, width, height, buffer_size, buffer, stream, row_pitch);
+  impl_->read_framebuffer(this, fmt, width, height, buffer_size, buffer, stream, row_pitch);
 }
 
 }  // namespace holoscan::viz
diff --git a/modules/holoviz/src/vulkan/vulkan_app.hpp b/modules/holoviz/src/vulkan/vulkan_app.hpp
index 007cc1b..5c64d6e 100644
--- a/modules/holoviz/src/vulkan/vulkan_app.hpp
+++ b/modules/holoviz/src/vulkan/vulkan_app.hpp
@@ -22,7 +22,6 @@
 #include <nvmath/nvmath_types.h>
 #include <vulkan/vulkan_core.h>
 
-#include <array>
 #include <cstdint>
 #include <list>
 #include <memory>
@@ -75,6 +74,11 @@ class Vulkan {
    */
   CudaService* get_cuda_service() const;
 
+  /**
+   * @return the Vulkan device
+   */
+  vk::Device get_device() const;
+
   /**
    * Get the supported surface formats.
    *
@@ -146,87 +150,72 @@ class Vulkan {
   void set_viewport(float x, float y, float width, float height);
 
   /**
-   * Create a texture to be used for interop with CUDA, see ::upload_to_texture.
-   * Destroy with ::destroy_texture.
-   *
-   * @param width, height     size
-   * @param format            texture format
-   * @param component_mapping component mapping
-   * @param filter            texture filter
-   * @param normalized        if true, then texture coordinates are normalize (0...1),
-   *                             else (0...width, 0...height)
-   * @return created texture object
+   * Arguments for create_texture()
    */
-  Texture* create_texture_for_cuda_interop(uint32_t width, uint32_t height, ImageFormat format,
-                                           const vk::ComponentMapping& component_mapping,
-                                           vk::Filter filter = vk::Filter::eLinear,
-                                           bool normalized = true);
+  struct CreateTextureArgs {
+    uint32_t width_;                           //< texture width
+    uint32_t height_;                          //< texture height
+    ImageFormat format_;                       //< texture format
+    vk::ComponentMapping component_mapping_;   //< component mapping
+    vk::Filter filter_ = vk::Filter::eLinear;  //< texture filter
+    bool normalized_ = true;     //< if true, then texture coordinates are normalize (0...1), else
+                                 //< (0...width, 0...height)
+    bool cuda_interop_ = false;  //< used for interop with CUDA
+    vk::SamplerYcbcrModelConversion ycbcr_model_conversion_;  ///< YCbCr model conversion
+    vk::SamplerYcbcrRange ycbcr_range_;                       ///< YCbCR range
+    vk::ChromaLocation x_chroma_location_;  ///< chroma location in x direction for formats which
+                                            ///< are chroma downsampled in width (420 and 422)
+    vk::ChromaLocation y_chroma_location_;  ///< chroma location in y direction for formats which
+                                            ///< are chroma downsampled in height (420)
+  };
 
   /**
-   * Create a Texture using host data. Destroy with ::destroy_texture.
+   * Create a Texture using host data.
    *
-   * @param width, height     size
-   * @param format            texture format
-   * @param data_size         data size in bytes
-   * @param data              texture data
-   * @param component_mapping component mapping
-   * @param filter            texture filter
-   * @param normalized        if true, then texture coordinates are normalize (0...1),
-   *                             else (0...width, 0...height)
+   * @param args arguments
    * @return created texture object
    */
-  Texture* create_texture(uint32_t width, uint32_t height, ImageFormat format, size_t data_size,
-                          const void* data, const vk::ComponentMapping& component_mapping,
-                          vk::Filter filter = vk::Filter::eLinear, bool normalized = true);
+  std::unique_ptr<Texture> create_texture(const CreateTextureArgs& args);
 
   /**
-   * Destroy a texture created with ::create_texture_for_cuda_interop or ::create_texture.
-   *
-   * @param texture   texture to destroy
-   */
-  void destroy_texture(Texture* texture);
-
-  /**
-   * Upload data from CUDA device memory to a texture created with ::create_texture_for_cuda_interop
+   * Upload data from host memory to a texture created with ::create_texture
    *
-   * @param device_ptr    CUDA device memory
-   * @param row_pitch     the number of bytes between each row, if zero then data is assumed to be
-   * contiguous in memory
    * @param texture       texture to be updated
-   * @param ext_stream    CUDA stream to use for operations
+   * @param host_ptr      data in host memory to upload for the planes
+   * @param row_pitch     the number of bytes between each row for the planes, if zero then data is
+   * assumed to be contiguous in memory
    */
-  void upload_to_texture(CUdeviceptr device_ptr, size_t row_pitch, Texture* texture,
-                         CUstream ext_stream);
+  void upload_to_texture(Texture* texture, const std::array<const void*, 3>& host_ptr,
+                         const std::array<size_t, 3>& row_pitch);
 
   /**
-   * Upload data from host memory to a texture created with ::create_texture
+   * Upload data from a Buffer to a texture
    *
-   * @param host_ptr      data to upload in host memory
-   * @param row_pitch     the number of bytes between each row, if zero then data is assumed to be
-   * contiguous in memory
-   * @param texture       texture to be updated
+   * @param texture texture to be updated
+   * @param buffers data to be uploaded for each plane
    */
-  void upload_to_texture(const void* host_ptr, size_t row_pitch, Texture* texture);
+  void upload_to_texture(Texture* texture, const std::array<Buffer*, 3>& buffers);
 
   /**
    * Create a vertex or index buffer to be used for interop with CUDA, see ::upload_texture.
-   * Destroy with ::destroy_buffer.
    *
    * @param data_size     size of the buffer in bytes
    * @param usage         buffer usage
    * @return created buffer
    */
-  Buffer* create_buffer_for_cuda_interop(size_t data_size, vk::BufferUsageFlags usage);
+  std::unique_ptr<Buffer> create_buffer_for_cuda_interop(size_t data_size,
+                                                         vk::BufferUsageFlags usage);
 
   /**
-   * Create a vertex or index buffer and initialize with data. Destroy with ::destroy_buffer.
+   * Create a vertex or index buffer and initialize with data.
    *
    * @param data_size     size of the buffer in bytes
    * @param data          host size data to initialize buffer with or nullptr
    * @param usage         buffer usage
    * @return created buffer
    */
-  Buffer* create_buffer(size_t data_size, const void* data, vk::BufferUsageFlags usage);
+  std::unique_ptr<Buffer> create_buffer(size_t data_size, const void* data,
+                                        vk::BufferUsageFlags usage);
 
   /**
    * Upload data from CUDA device memory to a buffer created with ::create_buffer_for_cuda_interop
@@ -249,13 +238,6 @@ class Vulkan {
    */
   void upload_to_buffer(size_t data_size, const void* data, const Buffer* buffer);
 
-  /**
-   * Destroy a buffer created with ::CreateBuffer.
-   *
-   * @param buffer    buffer to destroy
-   */
-  void destroy_buffer(Buffer* buffer);
-
   /**
    * Draw a texture with an optional depth texture and color lookup table.
    *
diff --git a/modules/holoviz/tests/functional/image_layer_test.cpp b/modules/holoviz/tests/functional/image_layer_test.cpp
index 922a8c2..b70f2e6 100644
--- a/modules/holoviz/tests/functional/image_layer_test.cpp
+++ b/modules/holoviz/tests/functional/image_layer_test.cpp
@@ -124,6 +124,16 @@ std::ostream& operator<<(std::ostream& os, const ImageFormat& format) {
     CASE(ImageFormat::B8G8R8A8_SRGB)
     CASE(ImageFormat::A8B8G8R8_UNORM_PACK32)
     CASE(ImageFormat::A8B8G8R8_SRGB_PACK32)
+    CASE(ImageFormat::Y8U8Y8V8_422_UNORM)
+    CASE(ImageFormat::U8Y8V8Y8_422_UNORM)
+    CASE(ImageFormat::Y8_U8V8_2PLANE_420_UNORM)
+    CASE(ImageFormat::Y8_U8V8_2PLANE_422_UNORM)
+    CASE(ImageFormat::Y8_U8_V8_3PLANE_420_UNORM)
+    CASE(ImageFormat::Y8_U8_V8_3PLANE_422_UNORM)
+    CASE(ImageFormat::Y16_U16V16_2PLANE_420_UNORM)
+    CASE(ImageFormat::Y16_U16V16_2PLANE_422_UNORM)
+    CASE(ImageFormat::Y16_U16_V16_3PLANE_420_UNORM)
+    CASE(ImageFormat::Y16_U16_V16_3PLANE_422_UNORM)
     default:
       os.setstate(std::ios_base::failbit);
   }
@@ -136,13 +146,16 @@ std::ostream& operator<<(std::ostream& os, const ImageFormat& format) {
 
 class ImageLayer
     : public TestHeadless,
-      public testing::WithParamInterface<std::tuple<Source, Reuse, UseLut, viz::ImageFormat>> {};
+      public testing::WithParamInterface<std::tuple<Source, Reuse, UseLut, viz::ImageFormat,
+                                                    viz::YuvModelConversion, viz::YuvRange>> {};
 
 TEST_P(ImageLayer, Image) {
   const Source source = std::get<0>(GetParam());
   const bool reuse = std::get<1>(GetParam()) == Reuse::ENABLE;
   const UseLut use_lut = std::get<2>(GetParam());
   const viz::ImageFormat image_format = std::get<3>(GetParam());
+  const viz::YuvModelConversion yuv_model_conversion = std::get<4>(GetParam());
+  const viz::YuvRange yuv_range = std::get<5>(GetParam());
 
   if (use_lut == UseLut::ENABLE_WITH_NORMALIZE) {
     GTEST_SKIP() << "LUT with normalize tests not working yet, reference image generation needs to "
@@ -151,6 +164,10 @@ TEST_P(ImageLayer, Image) {
 
   bool use_depth = false;
   bool convert_color = false;
+  bool is_yuv = false;
+  uintptr_t offset_plane_1 = 0;
+  uintptr_t offset_plane_2 = 0;
+  std::vector<uint8_t> converted_data;
 
   switch (image_format) {
     case viz::ImageFormat::R8_UINT:
@@ -183,6 +200,55 @@ TEST_P(ImageLayer, Image) {
     case viz::ImageFormat::A8B8G8R8_SRGB_PACK32:
       convert_color = true;
       break;
+    case viz::ImageFormat::Y8U8Y8V8_422_UNORM:
+    case viz::ImageFormat::U8Y8V8Y8_422_UNORM:
+      is_yuv = true;
+      converted_data.resize((width_ * height_ * 1 + (width_ / 2) * height_ * 2) * sizeof(uint8_t));
+      break;
+    case viz::ImageFormat::Y8_U8V8_2PLANE_420_UNORM:
+      is_yuv = true;
+      offset_plane_1 = width_ * height_ * sizeof(uint8_t);
+      converted_data.resize(offset_plane_1 + ((width_ / 2) * (height_ / 2) * 2) * sizeof(uint8_t));
+      break;
+    case viz::ImageFormat::Y8_U8V8_2PLANE_422_UNORM:
+      is_yuv = true;
+      offset_plane_1 = width_ * height_ * sizeof(uint8_t);
+      converted_data.resize(offset_plane_1 + ((width_ / 2) * height_ * 2) * sizeof(uint8_t));
+      break;
+    case viz::ImageFormat::Y8_U8_V8_3PLANE_420_UNORM:
+      is_yuv = true;
+      offset_plane_1 = width_ * height_ * sizeof(uint8_t);
+      offset_plane_2 = offset_plane_1 + (width_ / 2) * (height_ / 2) * sizeof(uint8_t);
+      converted_data.resize(offset_plane_2 + ((width_ / 2) * (height_ / 2)) * sizeof(uint8_t));
+      break;
+    case viz::ImageFormat::Y8_U8_V8_3PLANE_422_UNORM:
+      is_yuv = true;
+      offset_plane_1 = width_ * height_ * sizeof(uint8_t);
+      offset_plane_2 = offset_plane_1 + (width_ / 2) * height_ * sizeof(uint8_t);
+      converted_data.resize(offset_plane_2 + ((width_ / 2) * height_) * sizeof(uint8_t));
+      break;
+    case viz::ImageFormat::Y16_U16V16_2PLANE_420_UNORM:
+      is_yuv = true;
+      offset_plane_1 = width_ * height_ * sizeof(uint16_t);
+      converted_data.resize(offset_plane_1 + ((width_ / 2) * (height_ / 2) * 2) * sizeof(uint16_t));
+      break;
+    case viz::ImageFormat::Y16_U16V16_2PLANE_422_UNORM:
+      is_yuv = true;
+      offset_plane_1 = width_ * height_ * sizeof(uint16_t);
+      converted_data.resize(offset_plane_1 + ((width_ / 2) * height_ * 2) * sizeof(uint16_t));
+      break;
+    case viz::ImageFormat::Y16_U16_V16_3PLANE_420_UNORM:
+      is_yuv = true;
+      offset_plane_1 = width_ * height_ * sizeof(uint16_t);
+      offset_plane_2 = offset_plane_1 + (width_ / 2) * (height_ / 2) * sizeof(uint16_t);
+      converted_data.resize(offset_plane_2 + ((width_ / 2) * (height_ / 2)) * sizeof(uint16_t));
+      break;
+    case viz::ImageFormat::Y16_U16_V16_3PLANE_422_UNORM:
+      is_yuv = true;
+      offset_plane_1 = width_ * height_ * sizeof(uint16_t);
+      offset_plane_2 = offset_plane_1 + (width_ / 2) * height_ * sizeof(uint16_t);
+      converted_data.resize(offset_plane_2 + ((width_ / 2) * height_) * sizeof(uint16_t));
+      break;
     case viz::ImageFormat::D16_UNORM:
     case viz::ImageFormat::X8_D24_UNORM:
     case viz::ImageFormat::D32_SFLOAT:
@@ -196,11 +262,197 @@ TEST_P(ImageLayer, Image) {
   }
 
   viz::ImageFormat color_format, depth_format;
+
   if (use_depth) {
     color_format = viz::ImageFormat::R8G8B8A8_UNORM;
     depth_format = image_format;
     SetupData(color_format);
     SetupData(depth_format);
+  } else if (is_yuv) {
+    // Skip test on iGPU, there is a Vulkan driver issue. The test fails on the first run only, the
+    // second run (within the same container) passes. The Vulkan driver has a shader cache, if the
+    // shader for the YUV format exists, the test passes, if the shader is not in the cache it
+    // fails.
+    CUdevice device = 0;
+    ASSERT_EQ(cuDeviceGet(&device, 0), CUDA_SUCCESS);
+    int is_integrated = false;
+    ASSERT_EQ(cuDeviceGetAttribute(&is_integrated, CU_DEVICE_ATTRIBUTE_INTEGRATED, device),
+              CUDA_SUCCESS);
+    if (is_integrated) { GTEST_SKIP() << "YUV tests fail on integrated devices, test skipped"; }
+
+    color_format = image_format;
+
+    // create a smooth RGB pattern so we don't need to deal with linear chroma filtering when
+    // calculating the expected result
+    color_data_.resize(width_ * height_ * 3);
+    for (uint32_t y = 0; y < height_; ++y) {
+      for (uint32_t x = 0; x < width_; ++x) {
+        color_data_[y * (width_ * 3) + x * 3 + 0] = x;
+        color_data_[y * (width_ * 3) + x * 3 + 1] = y;
+        color_data_[y * (width_ * 3) + x * 3 + 2] = 255 - x;
+      }
+    }
+    // convert to YUV
+    for (uint32_t y = 0; y < height_; ++y) {
+      for (uint32_t x = 0; x < width_; ++x) {
+        // RGB -> YUV conversion
+        const float r = color_data_[y * (width_ * 3) + x * 3 + 0] / 255.f;
+        const float g = color_data_[y * (width_ * 3) + x * 3 + 1] / 255.f;
+        const float b = color_data_[y * (width_ * 3) + x * 3 + 2] / 255.f;
+        float Kr, Kg, Kb;
+        switch (yuv_model_conversion) {
+          case viz::YuvModelConversion::YUV_601:
+            Kr = 0.299f;
+            Kb = 0.114f;
+            break;
+          case viz::YuvModelConversion::YUV_709:
+            Kb = 0.0722f;
+            Kr = 0.2126f;
+            break;
+          case viz::YuvModelConversion::YUV_2020:
+            Kb = 0.0593f;
+            Kr = 0.2627f;
+            break;
+          default:
+            ASSERT_TRUE(false) << "Unhandled yuv model conversion";
+            break;
+        }
+        // since Kr + Kg + Kb = 1.f, calculate Kg
+        Kg = 1.f - Kb - Kr;
+
+        float luma = Kr * r + Kg * g + Kb * b;  // 0 ... 1
+        float u = (b - luma) / (1.f - Kb);      // -1 ... 1
+        float v = (r - luma) / (1.f - Kr);      // -1 ... 1
+
+        switch (yuv_range) {
+          case viz::YuvRange::ITU_FULL:
+            u = u * 0.5f + 0.5f;
+            v = v * 0.5f + 0.5f;
+            break;
+          case viz::YuvRange::ITU_NARROW:
+            luma = 16.f / 255.f + luma * (219.f / 255.f);
+            u = 128.f / 255.f + u * 0.5f * (224.f / 255.f);
+            v = 128.f / 255.f + v * 0.5f * (224.f / 255.f);
+            break;
+          default:
+            ASSERT_TRUE(false) << "Unhandled yuv range";
+            break;
+        }
+
+        switch (image_format) {
+          case viz::ImageFormat::Y8U8Y8V8_422_UNORM:
+            converted_data[y * (width_ * 2) + x * 2] = uint8_t(luma * 255.f + 0.5f);
+            if ((x & 1) == 0) {
+              converted_data[y * (width_ * 2) + (x * 2) + 1] = uint8_t(u * 255.f + 0.5f);
+              converted_data[y * (width_ * 2) + (x * 2) + 3] = uint8_t(v * 255.f + 0.5f);
+            }
+            break;
+          case viz::ImageFormat::U8Y8V8Y8_422_UNORM:
+            converted_data[y * (width_ * 2) + (x * 2) + 1] = uint8_t(luma * 255.f + 0.5f);
+            if ((x & 1) == 0) {
+              converted_data[y * (width_ * 2) + (x * 2) + 0] = uint8_t(u * 255.f + 0.5f);
+              converted_data[y * (width_ * 2) + (x * 2) + 2] = uint8_t(v * 255.f + 0.5f);
+            }
+            break;
+          case viz::ImageFormat::Y8_U8V8_2PLANE_420_UNORM:
+            converted_data[y * width_ + x] = uint8_t(luma * 255.f + 0.5f);
+            if (((x & 1) == 0) && ((y & 1) == 0)) {
+              converted_data[offset_plane_1 + ((y / 2) * (width_ / 2) + (x / 2)) * 2 + 0] =
+                  uint8_t(u * 255.f + 0.5f);
+              converted_data[offset_plane_1 + ((y / 2) * (width_ / 2) + (x / 2)) * 2 + 1] =
+                  uint8_t(v * 255.f + 0.5f);
+            }
+            break;
+          case viz::ImageFormat::Y8_U8V8_2PLANE_422_UNORM:
+            converted_data[y * width_ + x] = uint8_t(luma * 255.f + 0.5f);
+            if ((x & 1) == 0) {
+              converted_data[offset_plane_1 + (y * (width_ / 2) + (x / 2)) * 2 + 0] =
+                  uint8_t(u * 255.f + 0.5f);
+              converted_data[offset_plane_1 + (y * (width_ / 2) + (x / 2)) * 2 + 1] =
+                  uint8_t(v * 255.f + 0.5f);
+            }
+            break;
+          case viz::ImageFormat::Y8_U8_V8_3PLANE_420_UNORM:
+            converted_data[y * width_ + x] = uint8_t(luma * 255.f + 0.5f);
+            if (((x & 1) == 0) && ((y & 1) == 0)) {
+              converted_data[offset_plane_1 + (y / 2) * (width_ / 2) + (x / 2)] =
+                  uint8_t(u * 255.f + 0.5f);
+              converted_data[offset_plane_2 + (y / 2) * (width_ / 2) + (x / 2)] =
+                  uint8_t(v * 255.f + 0.5f);
+            }
+            break;
+          case viz::ImageFormat::Y8_U8_V8_3PLANE_422_UNORM:
+            converted_data[y * width_ + x] = uint8_t(luma * 255.f + 0.5f);
+            if ((x & 1) == 0) {
+              converted_data[offset_plane_1 + y * (width_ / 2) + (x / 2)] =
+                  uint8_t(u * 255.f + 0.5f);
+              converted_data[offset_plane_2 + y * (width_ / 2) + (x / 2)] =
+                  uint8_t(v * 255.f + 0.5f);
+            }
+            break;
+          case viz::ImageFormat::Y16_U16V16_2PLANE_420_UNORM:
+            reinterpret_cast<uint16_t*>(converted_data.data())[y * width_ + x] =
+                uint16_t(luma * 65535.f + 0.5f);
+            if (((x & 1) == 0) && ((y & 1) == 0)) {
+              reinterpret_cast<uint16_t*>(
+                  converted_data.data())[offset_plane_1 / sizeof(uint16_t) +
+                                         ((y / 2) * (width_ / 2) + (x / 2)) * 2 + 0] =
+                  uint16_t(u * 65535.f + 0.5f);
+              reinterpret_cast<uint16_t*>(
+                  converted_data.data())[offset_plane_1 / sizeof(uint16_t) +
+                                         ((y / 2) * (width_ / 2) + (x / 2)) * 2 + 1] =
+                  uint16_t(v * 65535.f + 0.5f);
+            }
+            break;
+          case viz::ImageFormat::Y16_U16V16_2PLANE_422_UNORM:
+            reinterpret_cast<uint16_t*>(converted_data.data())[y * width_ + x] =
+                uint16_t(luma * 65535.f + 0.5f);
+            if ((x & 1) == 0) {
+              reinterpret_cast<uint16_t*>(
+                  converted_data.data())[offset_plane_1 / sizeof(uint16_t) +
+                                         (y * (width_ / 2) + (x / 2)) * 2 + 0] =
+                  uint16_t(u * 65535.f + 0.5f);
+              reinterpret_cast<uint16_t*>(
+                  converted_data.data())[offset_plane_1 / sizeof(uint16_t) +
+                                         (y * (width_ / 2) + (x / 2)) * 2 + 1] =
+                  uint16_t(v * 65535.f + 0.5f);
+            }
+            break;
+          case viz::ImageFormat::Y16_U16_V16_3PLANE_420_UNORM:
+            reinterpret_cast<uint16_t*>(converted_data.data())[y * width_ + x] =
+                uint16_t(luma * 65535.f + 0.5f);
+            if (((x & 1) == 0) && ((y & 1) == 0)) {
+              reinterpret_cast<uint16_t*>(converted_data.data())[offset_plane_1 / sizeof(uint16_t) +
+                                                                 (y / 2) * (width_ / 2) + (x / 2)] =
+                  uint16_t(u * 65535.f + 0.5f);
+              reinterpret_cast<uint16_t*>(converted_data.data())[offset_plane_2 / sizeof(uint16_t) +
+                                                                 (y / 2) * (width_ / 2) + (x / 2)] =
+                  uint16_t(v * 65535.f + 0.5f);
+            }
+            break;
+          case viz::ImageFormat::Y16_U16_V16_3PLANE_422_UNORM:
+            reinterpret_cast<uint16_t*>(converted_data.data())[y * width_ + x] =
+                uint16_t(luma * 65535.f + 0.5f);
+            if ((x & 1) == 0) {
+              reinterpret_cast<uint16_t*>(converted_data.data())[offset_plane_1 / sizeof(uint16_t) +
+                                                                 y * (width_ / 2) + (x / 2)] =
+                  uint16_t(u * 65535.f + 0.5f);
+              reinterpret_cast<uint16_t*>(converted_data.data())[offset_plane_2 / sizeof(uint16_t) +
+                                                                 y * (width_ / 2) + (x / 2)] =
+                  uint16_t(v * 65535.f + 0.5f);
+            }
+            break;
+          default:
+            ASSERT_TRUE(false) << "Unhandled image format";
+            break;
+        }
+      }
+    }
+    // use YUV as source and RGB as reference
+    std::swap(color_data_, converted_data);
+
+    depth_format = viz::ImageFormat::D32_SFLOAT;
+    depth_data_ = std::vector<float>(width_ * height_ * 1 * sizeof(float), 0.f);
   } else {
     color_format = image_format;
     depth_format = viz::ImageFormat::D32_SFLOAT;
@@ -209,7 +461,6 @@ TEST_P(ImageLayer, Image) {
   }
 
   std::vector<uint32_t> lut;
-  std::vector<uint8_t> converted_data;
 
   viz::CudaService::ScopedPush cuda_context;
   viz::UniqueCUdeviceptr color_device_ptr;
@@ -466,6 +717,11 @@ TEST_P(ImageLayer, Image) {
 
     EXPECT_NO_THROW(viz::BeginImageLayer());
 
+    if (is_yuv) {
+      EXPECT_NO_THROW(viz::ImageYuvModelConversion(yuv_model_conversion));
+      EXPECT_NO_THROW(viz::ImageYuvRange(yuv_range));
+    }
+
     if (use_lut != UseLut::DISABLE) {
       EXPECT_NO_THROW(viz::LUT(lut_size_,
                                viz::ImageFormat::R8G8B8A8_UNORM,
@@ -476,12 +732,28 @@ TEST_P(ImageLayer, Image) {
 
     switch (source) {
       case Source::HOST:
-        EXPECT_NO_THROW(viz::ImageHost(
-            width_, height_, color_format, reinterpret_cast<void*>(color_data_.data())));
+        EXPECT_NO_THROW(
+            viz::ImageHost(width_,
+                           height_,
+                           color_format,
+                           reinterpret_cast<void*>(color_data_.data()),
+                           0,
+                           offset_plane_1 ? color_data_.data() + offset_plane_1 : nullptr,
+                           0,
+                           offset_plane_2 ? color_data_.data() + offset_plane_2 : nullptr,
+                           0));
         break;
       case Source::CUDA_DEVICE:
         EXPECT_NO_THROW(
-            viz::ImageCudaDevice(width_, height_, color_format, color_device_ptr.get()));
+            viz::ImageCudaDevice(width_,
+                                 height_,
+                                 color_format,
+                                 color_device_ptr.get(),
+                                 0,
+                                 offset_plane_1 ? color_device_ptr.get() + offset_plane_1 : 0,
+                                 0,
+                                 offset_plane_2 ? color_device_ptr.get() + offset_plane_2 : 0,
+                                 0));
         break;
       default:
         EXPECT_TRUE(false) << "Unhandled source type";
@@ -509,7 +781,8 @@ TEST_P(ImageLayer, Image) {
 
   if (converted_data.size() != 0) {
     std::swap(color_data_, converted_data);
-    CompareColorResult();
+    // YUV data requires a higher absolute error because of conversion between color spaces
+    CompareColorResult(is_yuv ? 4 : 1);
     std::swap(converted_data, color_data_);
   } else {
     CompareColorResult();
@@ -523,27 +796,47 @@ INSTANTIATE_TEST_SUITE_P(ImageLayerSource, ImageLayer,
                          testing::Combine(testing::Values(Source::HOST, Source::CUDA_DEVICE),
                                           testing::Values(Reuse::DISABLE, Reuse::ENABLE),
                                           testing::Values(UseLut::DISABLE),
-                                          testing::Values(viz::ImageFormat::R8G8B8A8_UNORM)));
+                                          testing::Values(viz::ImageFormat::R8G8B8A8_UNORM),
+                                          testing::Values(viz::YuvModelConversion::YUV_601),
+                                          testing::Values(viz::YuvRange::ITU_FULL)));
 
-// native color formats
+// native RGB color formats
 INSTANTIATE_TEST_SUITE_P(
     ImageLayerFormat, ImageLayer,
-    testing::Combine(testing::Values(Source::CUDA_DEVICE), testing::Values(Reuse::DISABLE),
-                     testing::Values(UseLut::DISABLE),
-                     testing::Values(viz::ImageFormat::R8_UNORM, viz::ImageFormat::R8_SNORM,
-                                     viz::ImageFormat::R8_SRGB, viz::ImageFormat::R16_UNORM,
-                                     viz::ImageFormat::R16_SNORM, viz::ImageFormat::R32_SFLOAT,
-                                     viz::ImageFormat::R8G8B8A8_UNORM,
-                                     viz::ImageFormat::R8G8B8A8_SNORM,
-                                     viz::ImageFormat::R8G8B8A8_SRGB,
-                                     viz::ImageFormat::R16G16B16A16_SNORM,
-                                     viz::ImageFormat::R16G16B16A16_UNORM,
-                                     viz::ImageFormat::A2B10G10R10_UNORM_PACK32,
-                                     viz::ImageFormat::A2R10G10B10_UNORM_PACK32,
-                                     viz::ImageFormat::B8G8R8A8_UNORM,
-                                     viz::ImageFormat::B8G8R8A8_SRGB,
-                                     viz::ImageFormat::A8B8G8R8_UNORM_PACK32,
-                                     viz::ImageFormat::A8B8G8R8_SRGB_PACK32)));
+    testing::Combine(
+        testing::Values(Source::CUDA_DEVICE, Source::HOST), testing::Values(Reuse::DISABLE),
+        testing::Values(UseLut::DISABLE),
+        testing::Values(
+            viz::ImageFormat::R8_UNORM, viz::ImageFormat::R8_SNORM, viz::ImageFormat::R8_SRGB,
+            viz::ImageFormat::R16_UNORM, viz::ImageFormat::R16_SNORM, viz::ImageFormat::R32_SFLOAT,
+            viz::ImageFormat::R8G8B8A8_UNORM, viz::ImageFormat::R8G8B8A8_SNORM,
+            viz::ImageFormat::R8G8B8A8_SRGB, viz::ImageFormat::R16G16B16A16_SNORM,
+            viz::ImageFormat::R16G16B16A16_UNORM, viz::ImageFormat::A2B10G10R10_UNORM_PACK32,
+            viz::ImageFormat::A2R10G10B10_UNORM_PACK32, viz::ImageFormat::B8G8R8A8_UNORM,
+            viz::ImageFormat::B8G8R8A8_SRGB, viz::ImageFormat::A8B8G8R8_UNORM_PACK32,
+            viz::ImageFormat::A8B8G8R8_SRGB_PACK32),
+        testing::Values(viz::YuvModelConversion::YUV_601),
+        testing::Values(viz::YuvRange::ITU_FULL)));
+
+// native YUV color formats
+INSTANTIATE_TEST_SUITE_P(
+    ImageLayerFormatYUV, ImageLayer,
+    testing::Combine(testing::Values(Source::CUDA_DEVICE, Source::HOST),
+                     testing::Values(Reuse::DISABLE), testing::Values(UseLut::DISABLE),
+                     testing::Values(viz::ImageFormat::Y8U8Y8V8_422_UNORM,
+                                     viz::ImageFormat::U8Y8V8Y8_422_UNORM,
+                                     viz::ImageFormat::Y8_U8V8_2PLANE_420_UNORM,
+                                     viz::ImageFormat::Y8_U8V8_2PLANE_422_UNORM,
+                                     viz::ImageFormat::Y8_U8_V8_3PLANE_420_UNORM,
+                                     viz::ImageFormat::Y8_U8_V8_3PLANE_422_UNORM,
+                                     viz::ImageFormat::Y16_U16V16_2PLANE_420_UNORM,
+                                     viz::ImageFormat::Y16_U16V16_2PLANE_422_UNORM,
+                                     viz::ImageFormat::Y16_U16_V16_3PLANE_420_UNORM,
+                                     viz::ImageFormat::Y16_U16_V16_3PLANE_422_UNORM),
+                     testing::Values(viz::YuvModelConversion::YUV_601,
+                                     viz::YuvModelConversion::YUV_709,
+                                     viz::YuvModelConversion::YUV_2020),
+                     testing::Values(viz::YuvRange::ITU_FULL, viz::YuvRange::ITU_NARROW)));
 
 // LUT tests
 INSTANTIATE_TEST_SUITE_P(
@@ -552,7 +845,9 @@ INSTANTIATE_TEST_SUITE_P(
                      testing::Values(UseLut::ENABLE),
                      testing::Values(viz::ImageFormat::R8_UINT, viz::ImageFormat::R8_SINT,
                                      viz::ImageFormat::R16_UINT, viz::ImageFormat::R16_SINT,
-                                     viz::ImageFormat::R32_UINT, viz::ImageFormat::R32_SINT)));
+                                     viz::ImageFormat::R32_UINT, viz::ImageFormat::R32_SINT),
+                     testing::Values(viz::YuvModelConversion::YUV_601),
+                     testing::Values(viz::YuvRange::ITU_FULL)));
 
 // LUT with normalize tests
 INSTANTIATE_TEST_SUITE_P(
@@ -564,7 +859,9 @@ INSTANTIATE_TEST_SUITE_P(
                                      viz::ImageFormat::R16_UINT, viz::ImageFormat::R16_SINT,
                                      viz::ImageFormat::R16_UNORM, viz::ImageFormat::R16_SNORM,
                                      viz::ImageFormat::R32_UINT, viz::ImageFormat::R32_SINT,
-                                     viz::ImageFormat::R16_SFLOAT, viz::ImageFormat::R32_SFLOAT)));
+                                     viz::ImageFormat::R16_SFLOAT, viz::ImageFormat::R32_SFLOAT),
+                     testing::Values(viz::YuvModelConversion::YUV_601),
+                     testing::Values(viz::YuvRange::ITU_FULL)));
 
 // RGB is non-native, converted by CUDA kernel or host code
 INSTANTIATE_TEST_SUITE_P(ImageLayerConvert, ImageLayer,
@@ -573,14 +870,18 @@ INSTANTIATE_TEST_SUITE_P(ImageLayerConvert, ImageLayer,
                                           testing::Values(UseLut::DISABLE),
                                           testing::Values(viz::ImageFormat::R8G8B8_UNORM,
                                                           viz::ImageFormat::R8G8B8_SNORM,
-                                                          viz::ImageFormat::R8G8B8_SRGB)));
+                                                          viz::ImageFormat::R8G8B8_SRGB),
+                                          testing::Values(viz::YuvModelConversion::YUV_601),
+                                          testing::Values(viz::YuvRange::ITU_FULL)));
 
 // depth format tests
 INSTANTIATE_TEST_SUITE_P(ImageLayerDepth, ImageLayer,
                          testing::Combine(testing::Values(Source::HOST, Source::CUDA_DEVICE),
                                           testing::Values(Reuse::DISABLE, Reuse::ENABLE),
                                           testing::Values(UseLut::DISABLE),
-                                          testing::Values(viz::ImageFormat::D32_SFLOAT)));
+                                          testing::Values(viz::ImageFormat::D32_SFLOAT),
+                                          testing::Values(viz::YuvModelConversion::YUV_601),
+                                          testing::Values(viz::YuvRange::ITU_FULL)));
 
 TEST_F(ImageLayer, ImageCudaArray) {
   constexpr viz::ImageFormat kFormat = viz::ImageFormat::R8G8B8A8_UNORM;
@@ -693,6 +994,11 @@ TEST_F(ImageLayer, Errors) {
                                           viz::ComponentSwizzle::IDENTITY,
                                           viz::ComponentSwizzle::IDENTITY),
                std::runtime_error);
+  EXPECT_THROW(viz::ImageYuvModelConversion(viz::YuvModelConversion::YUV_601), std::runtime_error);
+  EXPECT_THROW(viz::ImageYuvRange(viz::YuvRange::ITU_FULL), std::runtime_error);
+  EXPECT_THROW(viz::ImageChromaLocation(viz::ChromaLocation::COSITED_EVEN,
+                                        viz::ChromaLocation::COSITED_EVEN),
+               std::runtime_error);
 
   // it's an error to call BeginImageLayer again without calling EndLayer
   EXPECT_NO_THROW(viz::BeginImageLayer());
@@ -713,6 +1019,11 @@ TEST_F(ImageLayer, Errors) {
                                           viz::ComponentSwizzle::IDENTITY,
                                           viz::ComponentSwizzle::IDENTITY),
                std::runtime_error);
+  EXPECT_THROW(viz::ImageYuvModelConversion(viz::YuvModelConversion::YUV_601), std::runtime_error);
+  EXPECT_THROW(viz::ImageYuvRange(viz::YuvRange::ITU_FULL), std::runtime_error);
+  EXPECT_THROW(viz::ImageChromaLocation(viz::ChromaLocation::COSITED_EVEN,
+                                        viz::ChromaLocation::COSITED_EVEN),
+               std::runtime_error);
   EXPECT_NO_THROW(viz::EndLayer());
 
   EXPECT_NO_THROW(viz::End());
diff --git a/modules/holoviz/tests/functional/test_fixture.cpp b/modules/holoviz/tests/functional/test_fixture.cpp
index 32131c9..8e21559 100644
--- a/modules/holoviz/tests/functional/test_fixture.cpp
+++ b/modules/holoviz/tests/functional/test_fixture.cpp
@@ -86,6 +86,7 @@ void TestBase::SetupData(viz::ImageFormat format, uint32_t rand_seed) {
 
   uint32_t channels;
   uint32_t component_size;
+  uint32_t elements;
   switch (format) {
     case viz::ImageFormat::R8_UINT:
     case viz::ImageFormat::R8_SINT:
@@ -276,7 +277,7 @@ static std::string BuildFileName(const std::string& end) {
   return file_name;
 }
 
-bool TestBase::CompareColorResult() {
+bool TestBase::CompareColorResult(uint8_t absolute_error) {
   const uint32_t components = color_data_.size() / (width_ * height_);
   if ((components != 1) && (components != 3) && (components != 4)) {
     EXPECT_TRUE(false) << "Can compare R8_UNORM, R8G8B8_UNORM or R8G8B8A8_UNORM data only";
@@ -290,7 +291,7 @@ bool TestBase::CompareColorResult() {
     bool different = false;
     for (uint32_t component = 0; component < components; ++component) {
       different |= std::abs(color_data_[index * components + component] -
-                            color_data[index * 4 + component]) > 1;
+                            color_data[index * 4 + component]) > absolute_error;
     }
     if (different) {
       const std::string ref_file_name = BuildFileName("color_ref");
diff --git a/modules/holoviz/tests/functional/test_fixture.hpp b/modules/holoviz/tests/functional/test_fixture.hpp
index 564f887..e2aba35 100644
--- a/modules/holoviz/tests/functional/test_fixture.hpp
+++ b/modules/holoviz/tests/functional/test_fixture.hpp
@@ -86,9 +86,11 @@ class TestBase : public ::testing::Test {
   /**
    * Read back color data and compare with the data generated with SetupData().
    *
+   * @param absolute_error allowed absolute error
+   *
    * @return false if read back and generated data do not match
    */
-  bool CompareColorResult();
+  bool CompareColorResult(uint8_t absolute_error = 1);
 
   /**
    * Read back depth data and compare with the data generated with SetupData().
diff --git a/modules/holoviz/thirdparty/nvpro_core/nvvk/buffersuballocator_vk.cpp b/modules/holoviz/thirdparty/nvpro_core/nvvk/buffersuballocator_vk.cpp
index 13979f5..0dfdbdf 100644
--- a/modules/holoviz/thirdparty/nvpro_core/nvvk/buffersuballocator_vk.cpp
+++ b/modules/holoviz/thirdparty/nvpro_core/nvvk/buffersuballocator_vk.cpp
@@ -13,7 +13,7 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
-* SPDX-FileCopyrightText: Copyright (c) 2014-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+* SPDX-FileCopyrightText: Copyright (c) 2014-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 */
 
@@ -302,7 +302,7 @@ VkResult BufferSubAllocator::allocBlock(Block& block, uint32_t index, VkDeviceSi
   MemAllocateInfo memAllocateInfo(memReqs.memoryRequirements, m_memoryPropFlags, false);
   memAllocateInfo.setDebugName(debugName);
 
-  MemHandle memory = m_memAllocator->allocMemory(memAllocateInfo, &result);
+  MemHandle memory = m_memAllocator->allocMemory(memAllocateInfo, 0, &result);
   if(result != VK_SUCCESS)
   {
     assert(0 && "could not allocate buffer\n");
diff --git a/modules/holoviz/thirdparty/nvpro_core/nvvk/descriptorsets_vk.hpp b/modules/holoviz/thirdparty/nvpro_core/nvvk/descriptorsets_vk.hpp
index 21fd499..0d32a5c 100644
--- a/modules/holoviz/thirdparty/nvpro_core/nvvk/descriptorsets_vk.hpp
+++ b/modules/holoviz/thirdparty/nvpro_core/nvvk/descriptorsets_vk.hpp
@@ -13,43 +13,38 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  *
- * SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 
-
 #pragma once
 
 #include <assert.h>
 #include <platform.h>
-#include <vector>
 #include <vulkan/vulkan_core.h>
+#include <vector>
 
 namespace nvvk {
 
-
 // utility for additional feature support
-enum class DescriptorSupport : uint32_t
-{
-  CORE_1_0     = 0,  // VK Version 1.0
-  CORE_1_2     = 1,  // VK Version 1.2 (adds descriptor_indexing)
+enum class DescriptorSupport : uint32_t {
+  CORE_1_0 = 0,      // VK Version 1.0
+  CORE_1_2 = 1,      // VK Version 1.2 (adds descriptor_indexing)
   INDEXING_EXT = 2,  // VK_EXT_descriptor_indexing
 };
 using DescriptorSupport_t = std::underlying_type_t<DescriptorSupport>;
-inline DescriptorSupport operator|(DescriptorSupport lhs, DescriptorSupport rhs)
-{
-  return static_cast<DescriptorSupport>(static_cast<DescriptorSupport_t>(lhs) | static_cast<DescriptorSupport_t>(rhs));
+inline DescriptorSupport operator|(DescriptorSupport lhs, DescriptorSupport rhs) {
+  return static_cast<DescriptorSupport>(static_cast<DescriptorSupport_t>(lhs) |
+                                        static_cast<DescriptorSupport_t>(rhs));
 }
-inline DescriptorSupport operator&(DescriptorSupport lhs, DescriptorSupport rhs)
-{
-  return static_cast<DescriptorSupport>(static_cast<DescriptorSupport_t>(lhs) & static_cast<DescriptorSupport_t>(rhs));
+inline DescriptorSupport operator&(DescriptorSupport lhs, DescriptorSupport rhs) {
+  return static_cast<DescriptorSupport>(static_cast<DescriptorSupport_t>(lhs) &
+                                        static_cast<DescriptorSupport_t>(rhs));
 }
-inline bool isSet(DescriptorSupport test, DescriptorSupport query)
-{
+inline bool isSet(DescriptorSupport test, DescriptorSupport query) {
   return (test & query) == query;
 }
-inline bool isAnySet(DescriptorSupport test, DescriptorSupport query)
-{
+inline bool isAnySet(DescriptorSupport test, DescriptorSupport query) {
   return (test & query) != DescriptorSupport::CORE_1_0;
 }
 
@@ -62,17 +57,18 @@ inline bool isAnySet(DescriptorSupport test, DescriptorSupport query)
 
 */
 
-inline VkDescriptorPool createDescriptorPool(VkDevice device, size_t poolSizeCount, const VkDescriptorPoolSize* poolSizes, uint32_t maxSets)
-{
+inline VkDescriptorPool createDescriptorPool(VkDevice device, size_t poolSizeCount,
+                                             const VkDescriptorPoolSize* poolSizes,
+                                             uint32_t maxSets) {
   VkResult result;
 
-  VkDescriptorPool           descrPool;
+  VkDescriptorPool descrPool;
   VkDescriptorPoolCreateInfo descrPoolInfo = {};
-  descrPoolInfo.sType                      = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
-  descrPoolInfo.pNext                      = nullptr;
-  descrPoolInfo.maxSets                    = maxSets;
-  descrPoolInfo.poolSizeCount              = uint32_t(poolSizeCount);
-  descrPoolInfo.pPoolSizes                 = poolSizes;
+  descrPoolInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
+  descrPoolInfo.pNext = nullptr;
+  descrPoolInfo.maxSets = maxSets;
+  descrPoolInfo.poolSizeCount = uint32_t(poolSizeCount);
+  descrPoolInfo.pPoolSizes = poolSizes;
 
   // scene pool
   result = vkCreateDescriptorPool(device, &descrPoolInfo, nullptr, &descrPool);
@@ -80,25 +76,30 @@ inline VkDescriptorPool createDescriptorPool(VkDevice device, size_t poolSizeCou
   return descrPool;
 }
 
-inline VkDescriptorPool createDescriptorPool(VkDevice device, const std::vector<VkDescriptorPoolSize>& poolSizes, uint32_t maxSets)
-{
+inline VkDescriptorPool createDescriptorPool(VkDevice device,
+                                             const std::vector<VkDescriptorPoolSize>& poolSizes,
+                                             uint32_t maxSets) {
   return createDescriptorPool(device, poolSizes.size(), poolSizes.data(), maxSets);
 }
 
 #ifdef VULKAN_HPP
-inline VkDescriptorPool createDescriptorPool(vk::Device device, const std::vector<vk::DescriptorPoolSize>& poolSizes, uint32_t maxSets)
-{
-  return createDescriptorPool(device, poolSizes.size(), reinterpret_cast<const VkDescriptorPoolSize*>(poolSizes.data()), maxSets);
+inline VkDescriptorPool createDescriptorPool(vk::Device device,
+                                             const std::vector<vk::DescriptorPoolSize>& poolSizes,
+                                             uint32_t maxSets) {
+  return createDescriptorPool(device,
+                              poolSizes.size(),
+                              reinterpret_cast<const VkDescriptorPoolSize*>(poolSizes.data()),
+                              maxSets);
 }
 #endif
 
-inline VkDescriptorSet allocateDescriptorSet(VkDevice device, VkDescriptorPool pool, VkDescriptorSetLayout layout)
-{
-  VkResult                    result;
+inline VkDescriptorSet allocateDescriptorSet(VkDevice device, VkDescriptorPool pool,
+                                             VkDescriptorSetLayout layout) {
+  VkResult result;
   VkDescriptorSetAllocateInfo allocInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
-  allocInfo.descriptorPool              = pool;
-  allocInfo.descriptorSetCount          = 1;
-  allocInfo.pSetLayouts                 = &layout;
+  allocInfo.descriptorPool = pool;
+  allocInfo.descriptorSetCount = 1;
+  allocInfo.pSetLayouts = &layout;
 
   VkDescriptorSet set;
   result = vkAllocateDescriptorSets(device, &allocInfo, &set);
@@ -106,32 +107,27 @@ inline VkDescriptorSet allocateDescriptorSet(VkDevice device, VkDescriptorPool p
   return set;
 }
 
-inline void allocateDescriptorSets(VkDevice                      device,
-                                   VkDescriptorPool              pool,
-                                   VkDescriptorSetLayout         layout,
-                                   uint32_t                      count,
-                                   std::vector<VkDescriptorSet>& sets)
-{
+inline void allocateDescriptorSets(VkDevice device, VkDescriptorPool pool,
+                                   VkDescriptorSetLayout layout, uint32_t count,
+                                   std::vector<VkDescriptorSet>& sets) {
   sets.resize(count);
   std::vector<VkDescriptorSetLayout> layouts(count, layout);
 
-  VkResult                    result;
+  VkResult result;
   VkDescriptorSetAllocateInfo allocInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
-  allocInfo.descriptorPool              = pool;
-  allocInfo.descriptorSetCount          = count;
-  allocInfo.pSetLayouts                 = layouts.data();
+  allocInfo.descriptorPool = pool;
+  allocInfo.descriptorSetCount = count;
+  allocInfo.pSetLayouts = layouts.data();
 
   result = vkAllocateDescriptorSets(device, &allocInfo, sets.data());
   assert(result == VK_SUCCESS);
 }
 #ifdef VULKAN_HPP
-inline void allocateDescriptorSets(vk::Device                      device,
-                                   vk::DescriptorPool              pool,
-                                   vk::DescriptorSetLayout         layout,
-                                   uint32_t                        count,
-                                   std::vector<vk::DescriptorSet>& sets)
-{
-  allocateDescriptorSets(device, pool, layout, count, reinterpret_cast<std::vector<VkDescriptorSet>&>(sets));
+inline void allocateDescriptorSets(vk::Device device, vk::DescriptorPool pool,
+                                   vk::DescriptorSetLayout layout, uint32_t count,
+                                   std::vector<vk::DescriptorSet>& sets) {
+  allocateDescriptorSets(
+      device, pool, layout, count, reinterpret_cast<std::vector<VkDescriptorSet>&>(sets));
 }
 #endif
 
@@ -139,11 +135,11 @@ inline void allocateDescriptorSets(vk::Device                      device,
 /**
   \class nvvk::DescriptorSetBindings
 
-  nvvk::DescriptorSetBindings is a helper class that keeps a vector of `VkDescriptorSetLayoutBinding` for a single
-  `VkDescriptorSetLayout`. Provides helper functions to create `VkDescriptorSetLayout`
-  as well as `VkDescriptorPool` based on this information, as well as utilities
-  to fill the `VkWriteDescriptorSet` structure with binding information stored
-  within the class.
+  nvvk::DescriptorSetBindings is a helper class that keeps a vector of
+  `VkDescriptorSetLayoutBinding` for a single `VkDescriptorSetLayout`. Provides helper functions to
+  create `VkDescriptorSetLayout` as well as `VkDescriptorPool` based on this information, as well as
+  utilities to fill the `VkWriteDescriptorSet` structure with binding information stored within the
+  class.
 
   The class comes with the convenience functionality that when you make a
   VkWriteDescriptorSet you provide the binding slot, rather than the
@@ -184,237 +180,251 @@ inline void allocateDescriptorSets(vk::Device                      device,
   \endcode
 */
 
-class DescriptorSetBindings
-{
-public:
+class DescriptorSetBindings {
+ public:
   DescriptorSetBindings() = default;
   DescriptorSetBindings(const std::vector<VkDescriptorSetLayoutBinding>& bindings)
-      : m_bindings(bindings)
-  {
-  }
+      : m_bindings(bindings) {}
 
   // Add a binding to the descriptor set
-  void addBinding(uint32_t binding,          // Slot to which the descriptor will be bound, corresponding to the layout
-                                             // binding index in the shader
-                  VkDescriptorType   type,   // Type of the bound descriptor(s)
-                  uint32_t           count,  // Number of descriptors
-                  VkShaderStageFlags stageFlags,  // Shader stages at which the bound resources will be available
-                  const VkSampler*   pImmutableSampler = nullptr  // Corresponding sampler, in case of textures
-  )
-  {
+  void addBinding(
+      uint32_t binding,  // Slot to which the descriptor will be bound, corresponding to the layout
+                         // binding index in the shader
+      VkDescriptorType type,  // Type of the bound descriptor(s)
+      uint32_t count,         // Number of descriptors
+      VkShaderStageFlags
+          stageFlags,  // Shader stages at which the bound resources will be available
+      const VkSampler* pImmutableSampler = nullptr  // Corresponding sampler, in case of textures
+  ) {
     m_bindings.push_back({binding, type, count, stageFlags, pImmutableSampler});
   }
 
-  void addBinding(const VkDescriptorSetLayoutBinding& layoutBinding) { m_bindings.emplace_back(layoutBinding); }
+  void addBinding(const VkDescriptorSetLayoutBinding& layoutBinding) {
+    m_bindings.emplace_back(layoutBinding);
+  }
 
-  void setBindings(const std::vector<VkDescriptorSetLayoutBinding>& bindings) { m_bindings = bindings; }
+  void setBindings(const std::vector<VkDescriptorSetLayoutBinding>& bindings) {
+    m_bindings = bindings;
+  }
   // requires use of SUPPORT_INDEXING_EXT/SUPPORT_INDEXING_V1_2 on createLayout
   void setBindingFlags(uint32_t binding, VkDescriptorBindingFlags bindingFlags);
 
-  void clear()
-  {
+  void clear() {
     m_bindings.clear();
     m_bindingFlags.clear();
   }
-  bool                                empty() const { return m_bindings.empty(); }
-  size_t                              size() const { return m_bindings.size(); }
+  bool empty() const { return m_bindings.empty(); }
+  size_t size() const { return m_bindings.size(); }
   const VkDescriptorSetLayoutBinding* data() const { return m_bindings.data(); }
+  VkDescriptorSetLayoutBinding* data() { return m_bindings.data(); }
 
   VkDescriptorType getType(uint32_t binding) const;
-  uint32_t         getCount(uint32_t binding) const;
-
+  uint32_t getCount(uint32_t binding) const;
 
   // Once the bindings have been added, this generates the descriptor layout corresponding to the
   // bound resources.
-  VkDescriptorSetLayout createLayout(VkDevice                         device,
-                                     VkDescriptorSetLayoutCreateFlags flags        = 0,
-                                     DescriptorSupport                supportFlags = DescriptorSupport::CORE_1_0) const;
+  VkDescriptorSetLayout createLayout(
+      VkDevice device, VkDescriptorSetLayoutCreateFlags flags = 0,
+      DescriptorSupport supportFlags = DescriptorSupport::CORE_1_0) const;
 
   // Once the bindings have been added, this generates the descriptor pool with enough space to
   // handle all the bound resources and allocate up to maxSets descriptor sets
-  VkDescriptorPool createPool(VkDevice device, uint32_t maxSets = 1, VkDescriptorPoolCreateFlags flags = 0) const;
+  VkDescriptorPool createPool(VkDevice device, uint32_t maxSets = 1,
+                              VkDescriptorPoolCreateFlags flags = 0) const;
 
   // appends the required poolsizes for N sets
   void addRequiredPoolSizes(std::vector<VkDescriptorPoolSize>& poolSizes, uint32_t numSets) const;
 
   // provide single element
-  VkWriteDescriptorSet makeWrite(VkDescriptorSet dstSet, uint32_t dstBinding, uint32_t arrayElement = 0) const;
-  VkWriteDescriptorSet makeWrite(VkDescriptorSet              dstSet,
-                                 uint32_t                     dstBinding,
+  VkWriteDescriptorSet makeWrite(VkDescriptorSet dstSet, uint32_t dstBinding,
+                                 uint32_t arrayElement = 0) const;
+  VkWriteDescriptorSet makeWrite(VkDescriptorSet dstSet, uint32_t dstBinding,
                                  const VkDescriptorImageInfo* pImageInfo,
-                                 uint32_t                     arrayElement = 0) const;
-  VkWriteDescriptorSet makeWrite(VkDescriptorSet               dstSet,
-                                 uint32_t                      dstBinding,
+                                 uint32_t arrayElement = 0) const;
+  VkWriteDescriptorSet makeWrite(VkDescriptorSet dstSet, uint32_t dstBinding,
                                  const VkDescriptorBufferInfo* pBufferInfo,
-                                 uint32_t                      arrayElement = 0) const;
-  VkWriteDescriptorSet makeWrite(VkDescriptorSet     dstSet,
-                                 uint32_t            dstBinding,
+                                 uint32_t arrayElement = 0) const;
+  VkWriteDescriptorSet makeWrite(VkDescriptorSet dstSet, uint32_t dstBinding,
                                  const VkBufferView* pTexelBufferView,
-                                 uint32_t            arrayElement = 0) const;
+                                 uint32_t arrayElement = 0) const;
 #if VK_NV_ray_tracing
-  VkWriteDescriptorSet makeWrite(VkDescriptorSet                                    dstSet,
-                                 uint32_t                                           dstBinding,
+  VkWriteDescriptorSet makeWrite(VkDescriptorSet dstSet, uint32_t dstBinding,
                                  const VkWriteDescriptorSetAccelerationStructureNV* pAccel,
-                                 uint32_t                                           arrayElement = 0) const;
+                                 uint32_t arrayElement = 0) const;
 #endif
 #if VK_KHR_acceleration_structure
-  VkWriteDescriptorSet makeWrite(VkDescriptorSet                                     dstSet,
-                                 uint32_t                                            dstBinding,
+  VkWriteDescriptorSet makeWrite(VkDescriptorSet dstSet, uint32_t dstBinding,
                                  const VkWriteDescriptorSetAccelerationStructureKHR* pAccel,
-                                 uint32_t                                            arrayElement = 0) const;
+                                 uint32_t arrayElement = 0) const;
 #endif
 #if VK_EXT_inline_uniform_block
-  VkWriteDescriptorSet makeWrite(VkDescriptorSet                                  dstSet,
-                                 uint32_t                                         dstBinding,
+  VkWriteDescriptorSet makeWrite(VkDescriptorSet dstSet, uint32_t dstBinding,
                                  const VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniform,
-                                 uint32_t                                         arrayElement = 0) const;
+                                 uint32_t arrayElement = 0) const;
 #endif
   // provide full array
   VkWriteDescriptorSet makeWriteArray(VkDescriptorSet dstSet, uint32_t dstBinding) const;
-  VkWriteDescriptorSet makeWriteArray(VkDescriptorSet dstSet, uint32_t dstBinding, const VkDescriptorImageInfo* pImageInfo) const;
-  VkWriteDescriptorSet makeWriteArray(VkDescriptorSet dstSet, uint32_t dstBinding, const VkDescriptorBufferInfo* pBufferInfo) const;
-  VkWriteDescriptorSet makeWriteArray(VkDescriptorSet dstSet, uint32_t dstBinding, const VkBufferView* pTexelBufferView) const;
+  VkWriteDescriptorSet makeWriteArray(VkDescriptorSet dstSet, uint32_t dstBinding,
+                                      const VkDescriptorImageInfo* pImageInfo) const;
+  VkWriteDescriptorSet makeWriteArray(VkDescriptorSet dstSet, uint32_t dstBinding,
+                                      const VkDescriptorBufferInfo* pBufferInfo) const;
+  VkWriteDescriptorSet makeWriteArray(VkDescriptorSet dstSet, uint32_t dstBinding,
+                                      const VkBufferView* pTexelBufferView) const;
 #if VK_NV_ray_tracing
-  VkWriteDescriptorSet makeWriteArray(VkDescriptorSet                                    dstSet,
-                                      uint32_t                                           dstBinding,
-                                      const VkWriteDescriptorSetAccelerationStructureNV* pAccel) const;
+  VkWriteDescriptorSet makeWriteArray(
+      VkDescriptorSet dstSet, uint32_t dstBinding,
+      const VkWriteDescriptorSetAccelerationStructureNV* pAccel) const;
 #endif
 #if VK_KHR_acceleration_structure
-  VkWriteDescriptorSet makeWriteArray(VkDescriptorSet                                     dstSet,
-                                      uint32_t                                            dstBinding,
-                                      const VkWriteDescriptorSetAccelerationStructureKHR* pAccel) const;
+  VkWriteDescriptorSet makeWriteArray(
+      VkDescriptorSet dstSet, uint32_t dstBinding,
+      const VkWriteDescriptorSetAccelerationStructureKHR* pAccel) const;
 #endif
 #if VK_EXT_inline_uniform_block
-  VkWriteDescriptorSet makeWriteArray(VkDescriptorSet                                  dstSet,
-                                      uint32_t                                         dstBinding,
-                                      const VkWriteDescriptorSetInlineUniformBlockEXT* pInline) const;
+  VkWriteDescriptorSet makeWriteArray(
+      VkDescriptorSet dstSet, uint32_t dstBinding,
+      const VkWriteDescriptorSetInlineUniformBlockEXT* pInline) const;
 #endif
 #ifdef VULKAN_HPP
-  DescriptorSetBindings(const std::vector<vk::DescriptorSetLayoutBinding>& bindings)
-  {
+  DescriptorSetBindings(const std::vector<vk::DescriptorSetLayoutBinding>& bindings) {
     auto source = &static_cast<const VkDescriptorSetLayoutBinding&>(bindings[0]);
     m_bindings.assign(source, source + bindings.size());
   }
-  void addBinding(uint32_t binding,           // Slot to which the descriptor will be bound, corresponding to the layout
-                                              // binding index in the shader
-                  vk::DescriptorType   type,  // Type of the bound descriptor(s)
-                  uint32_t             count,       // Number of descriptors
-                  vk::ShaderStageFlags stageFlags,  // Shader stages at which the bound resources will be available
-                  const vk::Sampler*   pImmutableSampler = nullptr  // Corresponding sampler, in case of textures
-  )
-  {
-    m_bindings.push_back({binding, static_cast<VkDescriptorType>(type), count, static_cast<VkShaderStageFlags>(stageFlags),
+  void addBinding(
+      uint32_t binding,  // Slot to which the descriptor will be bound, corresponding to the layout
+                         // binding index in the shader
+      vk::DescriptorType type,  // Type of the bound descriptor(s)
+      uint32_t count,           // Number of descriptors
+      vk::ShaderStageFlags
+          stageFlags,  // Shader stages at which the bound resources will be available
+      const vk::Sampler* pImmutableSampler = nullptr  // Corresponding sampler, in case of textures
+  ) {
+    m_bindings.push_back({binding,
+                          static_cast<VkDescriptorType>(type),
+                          count,
+                          static_cast<VkShaderStageFlags>(stageFlags),
                           reinterpret_cast<const VkSampler*>(pImmutableSampler)});
   }
-  void addBinding(const vk::DescriptorSetLayoutBinding& layoutBinding)
-  {
+  void addBinding(const vk::DescriptorSetLayoutBinding& layoutBinding) {
     m_bindings.emplace_back(static_cast<VkDescriptorSetLayoutBinding>(layoutBinding));
   }
-  void setBindings(const std::vector<vk::DescriptorSetLayoutBinding>& bindings)
-  {
+  void setBindings(const std::vector<vk::DescriptorSetLayoutBinding>& bindings) {
     setBindings(reinterpret_cast<const std::vector<VkDescriptorSetLayoutBinding>&>(bindings));
   }
 
-  void setBindingFlags(uint32_t binding, vk::DescriptorBindingFlags bindingFlags)
-  {
+  void setBindingFlags(uint32_t binding, vk::DescriptorBindingFlags bindingFlags) {
     setBindingFlags(binding, static_cast<VkDescriptorBindingFlags>(bindingFlags));
   }
 
-  void addRequiredPoolSizes(std::vector<vk::DescriptorPoolSize>& poolSizes, uint32_t numSets) const
-  {
+  void addRequiredPoolSizes(std::vector<vk::DescriptorPoolSize>& poolSizes,
+                            uint32_t numSets) const {
     addRequiredPoolSizes(reinterpret_cast<std::vector<VkDescriptorPoolSize>&>(poolSizes), numSets);
   }
 
-  vk::WriteDescriptorSet makeWrite(vk::DescriptorSet              dstSet,
-                                   uint32_t                       dstBinding,
+  vk::WriteDescriptorSet makeWrite(vk::DescriptorSet dstSet, uint32_t dstBinding,
                                    const vk::DescriptorImageInfo* pImageInfo,
-                                   uint32_t                       arrayElement = 0) const
-  {
-    return makeWrite(dstSet, dstBinding, reinterpret_cast<const VkDescriptorImageInfo*>(pImageInfo), arrayElement);
+                                   uint32_t arrayElement = 0) const {
+    return makeWrite(dstSet,
+                     dstBinding,
+                     reinterpret_cast<const VkDescriptorImageInfo*>(pImageInfo),
+                     arrayElement);
   }
-  vk::WriteDescriptorSet makeWrite(vk::DescriptorSet               dstSet,
-                                   uint32_t                        dstBinding,
+  vk::WriteDescriptorSet makeWrite(vk::DescriptorSet dstSet, uint32_t dstBinding,
                                    const vk::DescriptorBufferInfo* pBufferInfo,
-                                   uint32_t                        arrayElement = 0) const
-  {
-    return makeWrite(dstSet, dstBinding, reinterpret_cast<const VkDescriptorBufferInfo*>(pBufferInfo), arrayElement);
+                                   uint32_t arrayElement = 0) const {
+    return makeWrite(dstSet,
+                     dstBinding,
+                     reinterpret_cast<const VkDescriptorBufferInfo*>(pBufferInfo),
+                     arrayElement);
   }
-  vk::WriteDescriptorSet makeWrite(vk::DescriptorSet     dstSet,
-                                   uint32_t              dstBinding,
+  vk::WriteDescriptorSet makeWrite(vk::DescriptorSet dstSet, uint32_t dstBinding,
                                    const vk::BufferView* pTexelBufferView,
-                                   uint32_t              arrayElement = 0) const
-  {
-    return makeWrite(dstSet, dstBinding, reinterpret_cast<const VkBufferView*>(pTexelBufferView), arrayElement);
+                                   uint32_t arrayElement = 0) const {
+    return makeWrite(
+        dstSet, dstBinding, reinterpret_cast<const VkBufferView*>(pTexelBufferView), arrayElement);
   }
 #if VK_NV_ray_tracing
-  vk::WriteDescriptorSet makeWrite(vk::DescriptorSet                                    dstSet,
-                                   uint32_t                                             dstBinding,
+  vk::WriteDescriptorSet makeWrite(vk::DescriptorSet dstSet, uint32_t dstBinding,
                                    const vk::WriteDescriptorSetAccelerationStructureNV* pAccel,
-                                   uint32_t                                             arrayElement = 0) const
-  {
-    return makeWrite(dstSet, dstBinding, reinterpret_cast<const VkWriteDescriptorSetAccelerationStructureNV*>(pAccel), arrayElement);
+                                   uint32_t arrayElement = 0) const {
+    return makeWrite(dstSet,
+                     dstBinding,
+                     reinterpret_cast<const VkWriteDescriptorSetAccelerationStructureNV*>(pAccel),
+                     arrayElement);
   }
 #endif
 #if VK_KHR_acceleration_structure
-  vk::WriteDescriptorSet makeWrite(vk::DescriptorSet                                     dstSet,
-                                   uint32_t                                              dstBinding,
+  vk::WriteDescriptorSet makeWrite(vk::DescriptorSet dstSet, uint32_t dstBinding,
                                    const vk::WriteDescriptorSetAccelerationStructureKHR* pAccel,
-                                   uint32_t                                              arrayElement = 0) const
-  {
-    return makeWrite(dstSet, dstBinding, reinterpret_cast<const VkWriteDescriptorSetAccelerationStructureKHR*>(pAccel), arrayElement);
+                                   uint32_t arrayElement = 0) const {
+    return makeWrite(dstSet,
+                     dstBinding,
+                     reinterpret_cast<const VkWriteDescriptorSetAccelerationStructureKHR*>(pAccel),
+                     arrayElement);
   }
 #endif
 #if VK_EXT_inline_uniform_block
-  vk::WriteDescriptorSet makeWrite(vk::DescriptorSet                                  dstSet,
-                                   uint32_t                                           dstBinding,
-                                   const vk::WriteDescriptorSetInlineUniformBlockEXT* pInlineUniform,
-                                   uint32_t                                           arrayElement = 0) const
-  {
-    return makeWrite(dstSet, dstBinding,
-                     reinterpret_cast<const VkWriteDescriptorSetInlineUniformBlockEXT*>(pInlineUniform), arrayElement);
+  vk::WriteDescriptorSet makeWrite(
+      vk::DescriptorSet dstSet, uint32_t dstBinding,
+      const vk::WriteDescriptorSetInlineUniformBlockEXT* pInlineUniform,
+      uint32_t arrayElement = 0) const {
+    return makeWrite(
+        dstSet,
+        dstBinding,
+        reinterpret_cast<const VkWriteDescriptorSetInlineUniformBlockEXT*>(pInlineUniform),
+        arrayElement);
   }
 #endif
-  vk::WriteDescriptorSet makeWriteArray(vk::DescriptorSet dstSet, uint32_t dstBinding, const vk::DescriptorImageInfo* pImageInfo) const
-  {
-    return makeWriteArray(dstSet, dstBinding, reinterpret_cast<const VkDescriptorImageInfo*>(pImageInfo));
+  vk::WriteDescriptorSet makeWriteArray(vk::DescriptorSet dstSet, uint32_t dstBinding,
+                                        const vk::DescriptorImageInfo* pImageInfo) const {
+    return makeWriteArray(
+        dstSet, dstBinding, reinterpret_cast<const VkDescriptorImageInfo*>(pImageInfo));
   }
-  vk::WriteDescriptorSet makeWriteArray(vk::DescriptorSet dstSet, uint32_t dstBinding, const vk::DescriptorBufferInfo* pBufferInfo) const
-  {
-    return makeWriteArray(dstSet, dstBinding, reinterpret_cast<const VkDescriptorBufferInfo*>(pBufferInfo));
+  vk::WriteDescriptorSet makeWriteArray(vk::DescriptorSet dstSet, uint32_t dstBinding,
+                                        const vk::DescriptorBufferInfo* pBufferInfo) const {
+    return makeWriteArray(
+        dstSet, dstBinding, reinterpret_cast<const VkDescriptorBufferInfo*>(pBufferInfo));
   }
-  vk::WriteDescriptorSet makeWriteArray(vk::DescriptorSet dstSet, uint32_t dstBinding, const vk::BufferView* pTexelBufferView) const
-  {
-    return makeWriteArray(dstSet, dstBinding, reinterpret_cast<const VkBufferView*>(pTexelBufferView));
+  vk::WriteDescriptorSet makeWriteArray(vk::DescriptorSet dstSet, uint32_t dstBinding,
+                                        const vk::BufferView* pTexelBufferView) const {
+    return makeWriteArray(
+        dstSet, dstBinding, reinterpret_cast<const VkBufferView*>(pTexelBufferView));
   }
 #if VK_NV_ray_tracing
-  vk::WriteDescriptorSet makeWriteArray(vk::DescriptorSet                                    dstSet,
-                                        uint32_t                                             dstBinding,
-                                        const vk::WriteDescriptorSetAccelerationStructureNV* pAccel) const
-  {
-    return makeWriteArray(dstSet, dstBinding, reinterpret_cast<const VkWriteDescriptorSetAccelerationStructureNV*>(pAccel));
+  vk::WriteDescriptorSet makeWriteArray(
+      vk::DescriptorSet dstSet, uint32_t dstBinding,
+      const vk::WriteDescriptorSetAccelerationStructureNV* pAccel) const {
+    return makeWriteArray(
+        dstSet,
+        dstBinding,
+        reinterpret_cast<const VkWriteDescriptorSetAccelerationStructureNV*>(pAccel));
   }
 #endif
 #if VK_KHR_acceleration_structure
-  vk::WriteDescriptorSet makeWriteArray(vk::DescriptorSet                                     dstSet,
-                                        uint32_t                                              dstBinding,
-                                        const vk::WriteDescriptorSetAccelerationStructureKHR* pAccel) const
-  {
-    return makeWriteArray(dstSet, dstBinding, reinterpret_cast<const VkWriteDescriptorSetAccelerationStructureKHR*>(pAccel));
+  vk::WriteDescriptorSet makeWriteArray(
+      vk::DescriptorSet dstSet, uint32_t dstBinding,
+      const vk::WriteDescriptorSetAccelerationStructureKHR* pAccel) const {
+    return makeWriteArray(
+        dstSet,
+        dstBinding,
+        reinterpret_cast<const VkWriteDescriptorSetAccelerationStructureKHR*>(pAccel));
   }
 #endif
 #if VK_EXT_inline_uniform_block
-  vk::WriteDescriptorSet makeWriteArray(vk::DescriptorSet                                  dstSet,
-                                        uint32_t                                           dstBinding,
-                                        const vk::WriteDescriptorSetInlineUniformBlockEXT* pInline) const
-  {
-    return makeWriteArray(dstSet, dstBinding, reinterpret_cast<const VkWriteDescriptorSetInlineUniformBlockEXT*>(pInline));
+  vk::WriteDescriptorSet makeWriteArray(
+      vk::DescriptorSet dstSet, uint32_t dstBinding,
+      const vk::WriteDescriptorSetInlineUniformBlockEXT* pInline) const {
+    return makeWriteArray(
+        dstSet,
+        dstBinding,
+        reinterpret_cast<const VkWriteDescriptorSetInlineUniformBlockEXT*>(pInline));
   }
 #endif
 #endif
 
-private:
+ private:
   std::vector<VkDescriptorSetLayoutBinding> m_bindings;
-  std::vector<VkDescriptorBindingFlags>     m_bindingFlags;
+  std::vector<VkDescriptorBindingFlags> m_bindingFlags;
 };
 
 /////////////////////////////////////////////////////////////
@@ -452,9 +462,8 @@ VkDescripterSetLayout.
 \endcode
 
 */
-class DescriptorSetContainer
-{
-public:
+class DescriptorSetContainer {
+ public:
   DescriptorSetContainer(DescriptorSetContainer const&) = delete;
   DescriptorSetContainer& operator=(DescriptorSetContainer const&) = delete;
 
@@ -466,25 +475,22 @@ class DescriptorSetContainer
 
   void setBindings(const std::vector<VkDescriptorSetLayoutBinding>& bindings);
   void addBinding(VkDescriptorSetLayoutBinding layoutBinding);
-  void addBinding(uint32_t           binding,
-                  VkDescriptorType   descriptorType,
-                  uint32_t           descriptorCount,
-                  VkShaderStageFlags stageFlags,
-                  const VkSampler*   pImmutableSamplers = nullptr);
+  void addBinding(uint32_t binding, VkDescriptorType descriptorType, uint32_t descriptorCount,
+                  VkShaderStageFlags stageFlags, const VkSampler* pImmutableSamplers = nullptr);
 
   // requires use of SUPPORT_INDEXING_EXT/SUPPORT_INDEXING_V1_2 on initLayout
   void setBindingFlags(uint32_t binding, VkDescriptorBindingFlags bindingFlags);
 
-  VkDescriptorSetLayout initLayout(VkDescriptorSetLayoutCreateFlags flags        = 0,
-                                   DescriptorSupport                supportFlags = DescriptorSupport::CORE_1_0);
+  VkDescriptorSetLayout initLayout(VkDescriptorSetLayoutCreateFlags flags = 0,
+                                   DescriptorSupport supportFlags = DescriptorSupport::CORE_1_0);
 
   // inits pool and immediately allocates all numSets-many DescriptorSets
   VkDescriptorPool initPool(uint32_t numAllocatedSets);
 
   // optionally generates a pipelinelayout for the descriptorsetlayout
-  VkPipelineLayout initPipeLayout(uint32_t                    numRanges = 0,
-                                  const VkPushConstantRange*  ranges    = nullptr,
-                                  VkPipelineLayoutCreateFlags flags     = 0);
+  VkPipelineLayout initPipeLayout(uint32_t numRanges = 0,
+                                  const VkPushConstantRange* ranges = nullptr,
+                                  VkPipelineLayoutCreateFlags flags = 0);
 
   void deinitPool();
   void deinitLayout();
@@ -492,196 +498,224 @@ class DescriptorSetContainer
 
   //////////////////////////////////////////////////////////////////////////
 
-  VkDescriptorSet        getSet(uint32_t dstSetIdx = 0) const { return m_descriptorSets[dstSetIdx]; }
-  const VkDescriptorSet* getSets(uint32_t dstSetIdx = 0) const { return m_descriptorSets.data() + dstSetIdx; }
-  uint32_t               getSetsCount() const { return static_cast<uint32_t>(m_descriptorSets.size()); }
+  VkDescriptorSet getSet(uint32_t dstSetIdx = 0) const { return m_descriptorSets[dstSetIdx]; }
+  const VkDescriptorSet* getSets(uint32_t dstSetIdx = 0) const {
+    return m_descriptorSets.data() + dstSetIdx;
+  }
+  uint32_t getSetsCount() const { return static_cast<uint32_t>(m_descriptorSets.size()); }
 
-  VkDescriptorSetLayout        getLayout() const { return m_layout; }
-  VkPipelineLayout             getPipeLayout() const { return m_pipelineLayout; }
+  VkDescriptorSetLayout getLayout() const { return m_layout; }
+  VkPipelineLayout getPipeLayout() const { return m_pipelineLayout; }
   const DescriptorSetBindings& getBindings() const { return m_bindings; }
-  VkDevice                     getDevice() const { return m_device; }
+  VkDevice getDevice() const { return m_device; }
 
   //////////////////////////////////////////////////////////////////////////
 
   // provide single element
-  VkWriteDescriptorSet makeWrite(uint32_t dstSetIdx, uint32_t dstBinding, const VkDescriptorImageInfo* pImageInfo, uint32_t arrayElement = 0) const
-  {
+  VkWriteDescriptorSet makeWrite(uint32_t dstSetIdx, uint32_t dstBinding,
+                                 const VkDescriptorImageInfo* pImageInfo,
+                                 uint32_t arrayElement = 0) const {
     return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding, pImageInfo, arrayElement);
   }
-  VkWriteDescriptorSet makeWrite(uint32_t dstSetIdx, uint32_t dstBinding, const VkDescriptorBufferInfo* pBufferInfo, uint32_t arrayElement = 0) const
-  {
+  VkWriteDescriptorSet makeWrite(uint32_t dstSetIdx, uint32_t dstBinding,
+                                 const VkDescriptorBufferInfo* pBufferInfo,
+                                 uint32_t arrayElement = 0) const {
     return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding, pBufferInfo, arrayElement);
   }
-  VkWriteDescriptorSet makeWrite(uint32_t dstSetIdx, uint32_t dstBinding, const VkBufferView* pTexelBufferView, uint32_t arrayElement = 0) const
-  {
+  VkWriteDescriptorSet makeWrite(uint32_t dstSetIdx, uint32_t dstBinding,
+                                 const VkBufferView* pTexelBufferView,
+                                 uint32_t arrayElement = 0) const {
     return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding, pTexelBufferView, arrayElement);
   }
 #if VK_NV_ray_tracing
-  VkWriteDescriptorSet makeWrite(uint32_t                                           dstSetIdx,
-                                 uint32_t                                           dstBinding,
+  VkWriteDescriptorSet makeWrite(uint32_t dstSetIdx, uint32_t dstBinding,
                                  const VkWriteDescriptorSetAccelerationStructureNV* pAccel,
-                                 uint32_t                                           arrayElement = 0) const
-  {
+                                 uint32_t arrayElement = 0) const {
     return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding, pAccel, arrayElement);
   }
 #endif
 #if VK_KHR_acceleration_structure
-  VkWriteDescriptorSet makeWrite(uint32_t                                            dstSetIdx,
-                                 uint32_t                                            dstBinding,
+  VkWriteDescriptorSet makeWrite(uint32_t dstSetIdx, uint32_t dstBinding,
                                  const VkWriteDescriptorSetAccelerationStructureKHR* pAccel,
-                                 uint32_t                                            arrayElement = 0) const
-  {
+                                 uint32_t arrayElement = 0) const {
     return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding, pAccel, arrayElement);
   }
 #endif
 #if VK_EXT_inline_uniform_block
-  VkWriteDescriptorSet makeWrite(uint32_t                                         dstSetIdx,
-                                 uint32_t                                         dstBinding,
+  VkWriteDescriptorSet makeWrite(uint32_t dstSetIdx, uint32_t dstBinding,
                                  const VkWriteDescriptorSetInlineUniformBlockEXT* pInline,
-                                 uint32_t                                         arrayElement = 0) const
-  {
+                                 uint32_t arrayElement = 0) const {
     return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding, pInline, arrayElement);
   }
 #endif
   // provide full array
-  VkWriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const VkDescriptorImageInfo* pImageInfo) const
-  {
+  VkWriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding,
+                                      const VkDescriptorImageInfo* pImageInfo) const {
     return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding, pImageInfo);
   }
-  VkWriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const VkDescriptorBufferInfo* pBufferInfo) const
-  {
+  VkWriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding,
+                                      const VkDescriptorBufferInfo* pBufferInfo) const {
     return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding, pBufferInfo);
   }
-  VkWriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const VkBufferView* pTexelBufferView) const
-  {
+  VkWriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding,
+                                      const VkBufferView* pTexelBufferView) const {
     return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding, pTexelBufferView);
   }
 #if VK_NV_ray_tracing
-  VkWriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const VkWriteDescriptorSetAccelerationStructureNV* pAccel) const
-  {
+  VkWriteDescriptorSet makeWriteArray(
+      uint32_t dstSetIdx, uint32_t dstBinding,
+      const VkWriteDescriptorSetAccelerationStructureNV* pAccel) const {
     return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding, pAccel);
   }
 #endif
 #if VK_KHR_acceleration_structure
-  VkWriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const VkWriteDescriptorSetAccelerationStructureKHR* pAccel) const
-  {
+  VkWriteDescriptorSet makeWriteArray(
+      uint32_t dstSetIdx, uint32_t dstBinding,
+      const VkWriteDescriptorSetAccelerationStructureKHR* pAccel) const {
     return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding, pAccel);
   }
 #endif
 #if VK_EXT_inline_uniform_block
-  VkWriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const VkWriteDescriptorSetInlineUniformBlockEXT* pInline) const
-  {
+  VkWriteDescriptorSet makeWriteArray(
+      uint32_t dstSetIdx, uint32_t dstBinding,
+      const VkWriteDescriptorSetInlineUniformBlockEXT* pInline) const {
     return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding, pInline);
   }
 #endif
 #ifdef VULKAN_HPP
-  void addBinding(uint32_t binding,           // Slot to which the descriptor will be bound, corresponding to the layout
-                                              // binding index in the shader
-                  vk::DescriptorType   type,  // Type of the bound descriptor(s)
-                  uint32_t             count,       // Number of descriptors
-                  vk::ShaderStageFlags stageFlags,  // Shader stages at which the bound resources will be available
-                  const vk::Sampler*   pImmutableSampler = nullptr  // Corresponding sampler, in case of textures
-  )
-  {
-    m_bindings.addBinding({binding, static_cast<VkDescriptorType>(type), count, static_cast<VkShaderStageFlags>(stageFlags),
+  void addBinding(
+      uint32_t binding,  // Slot to which the descriptor will be bound, corresponding to the layout
+                         // binding index in the shader
+      vk::DescriptorType type,  // Type of the bound descriptor(s)
+      uint32_t count,           // Number of descriptors
+      vk::ShaderStageFlags
+          stageFlags,  // Shader stages at which the bound resources will be available
+      const vk::Sampler* pImmutableSampler = nullptr  // Corresponding sampler, in case of textures
+  ) {
+    m_bindings.addBinding({binding,
+                           static_cast<VkDescriptorType>(type),
+                           count,
+                           static_cast<VkShaderStageFlags>(stageFlags),
                            reinterpret_cast<const VkSampler*>(pImmutableSampler)});
   }
-  void setBindings(const std::vector<vk::DescriptorSetLayoutBinding>& bindings)
-  {
-    m_bindings.setBindings(reinterpret_cast<const std::vector<VkDescriptorSetLayoutBinding>&>(bindings));
+  void setBindings(const std::vector<vk::DescriptorSetLayoutBinding>& bindings) {
+    m_bindings.setBindings(
+        reinterpret_cast<const std::vector<VkDescriptorSetLayoutBinding>&>(bindings));
   }
-  void setBindingFlags(uint32_t binding, vk::DescriptorBindingFlags bindingFlags)
-  {
+  void setBindingFlags(uint32_t binding, vk::DescriptorBindingFlags bindingFlags) {
     m_bindings.setBindingFlags(binding, static_cast<VkDescriptorBindingFlags>(bindingFlags));
   }
 
-  vk::WriteDescriptorSet makeWrite(uint32_t dstSetIdx, uint32_t dstBinding, const vk::DescriptorImageInfo* pImageInfo, uint32_t arrayElement = 0) const
-  {
-    return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding, reinterpret_cast<const VkDescriptorImageInfo*>(pImageInfo), arrayElement);
+  vk::WriteDescriptorSet makeWrite(uint32_t dstSetIdx, uint32_t dstBinding,
+                                   const vk::DescriptorImageInfo* pImageInfo,
+                                   uint32_t arrayElement = 0) const {
+    return m_bindings.makeWrite(getSet(dstSetIdx),
+                                dstBinding,
+                                reinterpret_cast<const VkDescriptorImageInfo*>(pImageInfo),
+                                arrayElement);
   }
-  vk::WriteDescriptorSet makeWrite(uint32_t                        dstSetIdx,
-                                   uint32_t                        dstBinding,
+  vk::WriteDescriptorSet makeWrite(uint32_t dstSetIdx, uint32_t dstBinding,
                                    const vk::DescriptorBufferInfo* pBufferInfo,
-                                   uint32_t                        arrayElement = 0) const
-  {
-    return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding,
-                                reinterpret_cast<const VkDescriptorBufferInfo*>(pBufferInfo), arrayElement);
+                                   uint32_t arrayElement = 0) const {
+    return m_bindings.makeWrite(getSet(dstSetIdx),
+                                dstBinding,
+                                reinterpret_cast<const VkDescriptorBufferInfo*>(pBufferInfo),
+                                arrayElement);
   }
-  vk::WriteDescriptorSet makeWrite(uint32_t dstSetIdx, uint32_t dstBinding, const vk::BufferView* pTexelBufferView, uint32_t arrayElement = 0) const
-  {
-    return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding, reinterpret_cast<const VkBufferView*>(pTexelBufferView), arrayElement);
+  vk::WriteDescriptorSet makeWrite(uint32_t dstSetIdx, uint32_t dstBinding,
+                                   const vk::BufferView* pTexelBufferView,
+                                   uint32_t arrayElement = 0) const {
+    return m_bindings.makeWrite(getSet(dstSetIdx),
+                                dstBinding,
+                                reinterpret_cast<const VkBufferView*>(pTexelBufferView),
+                                arrayElement);
   }
 #if VK_NV_ray_tracing
-  vk::WriteDescriptorSet makeWrite(uint32_t                                             dstSetIdx,
-                                   uint32_t                                             dstBinding,
+  vk::WriteDescriptorSet makeWrite(uint32_t dstSetIdx, uint32_t dstBinding,
                                    const vk::WriteDescriptorSetAccelerationStructureNV* pAccel,
-                                   uint32_t                                             arrayElement = 0) const
-  {
-    return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding,
-                                reinterpret_cast<const VkWriteDescriptorSetAccelerationStructureNV*>(pAccel), arrayElement);
+                                   uint32_t arrayElement = 0) const {
+    return m_bindings.makeWrite(
+        getSet(dstSetIdx),
+        dstBinding,
+        reinterpret_cast<const VkWriteDescriptorSetAccelerationStructureNV*>(pAccel),
+        arrayElement);
   }
 #endif
 #if VK_KHR_acceleration_structure
-  vk::WriteDescriptorSet makeWrite(uint32_t                                              dstSetIdx,
-                                   uint32_t                                              dstBinding,
+  vk::WriteDescriptorSet makeWrite(uint32_t dstSetIdx, uint32_t dstBinding,
                                    const vk::WriteDescriptorSetAccelerationStructureKHR* pAccel,
-                                   uint32_t                                              arrayElement = 0) const
-  {
-    return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding,
-                                reinterpret_cast<const VkWriteDescriptorSetAccelerationStructureKHR*>(pAccel), arrayElement);
+                                   uint32_t arrayElement = 0) const {
+    return m_bindings.makeWrite(
+        getSet(dstSetIdx),
+        dstBinding,
+        reinterpret_cast<const VkWriteDescriptorSetAccelerationStructureKHR*>(pAccel),
+        arrayElement);
   }
 #endif
-  vk::WriteDescriptorSet makeWrite(uint32_t                                           dstSetIdx,
-                                   uint32_t                                           dstBinding,
-                                   const vk::WriteDescriptorSetInlineUniformBlockEXT* pInlineUniform,
-                                   uint32_t                                           arrayElement = 0) const
-  {
-    return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding,
-                                reinterpret_cast<const VkWriteDescriptorSetInlineUniformBlockEXT*>(pInlineUniform), arrayElement);
-  }
-  vk::WriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const vk::DescriptorImageInfo* pImageInfo) const
-  {
-    return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding, reinterpret_cast<const VkDescriptorImageInfo*>(pImageInfo));
-  }
-  vk::WriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const vk::DescriptorBufferInfo* pBufferInfo) const
-  {
-    return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding, reinterpret_cast<const VkDescriptorBufferInfo*>(pBufferInfo));
-  }
-  vk::WriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const vk::BufferView* pTexelBufferView) const
-  {
-    return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding, reinterpret_cast<const VkBufferView*>(pTexelBufferView));
+  vk::WriteDescriptorSet makeWrite(
+      uint32_t dstSetIdx, uint32_t dstBinding,
+      const vk::WriteDescriptorSetInlineUniformBlockEXT* pInlineUniform,
+      uint32_t arrayElement = 0) const {
+    return m_bindings.makeWrite(
+        getSet(dstSetIdx),
+        dstBinding,
+        reinterpret_cast<const VkWriteDescriptorSetInlineUniformBlockEXT*>(pInlineUniform),
+        arrayElement);
+  }
+  vk::WriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding,
+                                        const vk::DescriptorImageInfo* pImageInfo) const {
+    return m_bindings.makeWriteArray(
+        getSet(dstSetIdx), dstBinding, reinterpret_cast<const VkDescriptorImageInfo*>(pImageInfo));
+  }
+  vk::WriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding,
+                                        const vk::DescriptorBufferInfo* pBufferInfo) const {
+    return m_bindings.makeWriteArray(getSet(dstSetIdx),
+                                     dstBinding,
+                                     reinterpret_cast<const VkDescriptorBufferInfo*>(pBufferInfo));
+  }
+  vk::WriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding,
+                                        const vk::BufferView* pTexelBufferView) const {
+    return m_bindings.makeWriteArray(
+        getSet(dstSetIdx), dstBinding, reinterpret_cast<const VkBufferView*>(pTexelBufferView));
   }
 #if VK_NV_ray_tracing
-  vk::WriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const vk::WriteDescriptorSetAccelerationStructureNV* pAccel) const
-  {
-    return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding,
-                                     reinterpret_cast<const VkWriteDescriptorSetAccelerationStructureNV*>(pAccel));
+  vk::WriteDescriptorSet makeWriteArray(
+      uint32_t dstSetIdx, uint32_t dstBinding,
+      const vk::WriteDescriptorSetAccelerationStructureNV* pAccel) const {
+    return m_bindings.makeWriteArray(
+        getSet(dstSetIdx),
+        dstBinding,
+        reinterpret_cast<const VkWriteDescriptorSetAccelerationStructureNV*>(pAccel));
   }
 #endif
 #if VK_KHR_acceleration_structure
-  vk::WriteDescriptorSet makeWriteArray(uint32_t                                              dstSetIdx,
-                                        uint32_t                                              dstBinding,
-                                        const vk::WriteDescriptorSetAccelerationStructureKHR* pAccel) const
-  {
-    return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding,
-                                     reinterpret_cast<const VkWriteDescriptorSetAccelerationStructureKHR*>(pAccel));
+  vk::WriteDescriptorSet makeWriteArray(
+      uint32_t dstSetIdx, uint32_t dstBinding,
+      const vk::WriteDescriptorSetAccelerationStructureKHR* pAccel) const {
+    return m_bindings.makeWriteArray(
+        getSet(dstSetIdx),
+        dstBinding,
+        reinterpret_cast<const VkWriteDescriptorSetAccelerationStructureKHR*>(pAccel));
   }
 #endif
 #if VK_EXT_inline_uniform_block
-  vk::WriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const vk::WriteDescriptorSetInlineUniformBlockEXT* pInline) const
-  {
-    return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding,
-                                     reinterpret_cast<const VkWriteDescriptorSetInlineUniformBlockEXT*>(pInline));
+  vk::WriteDescriptorSet makeWriteArray(
+      uint32_t dstSetIdx, uint32_t dstBinding,
+      const vk::WriteDescriptorSetInlineUniformBlockEXT* pInline) const {
+    return m_bindings.makeWriteArray(
+        getSet(dstSetIdx),
+        dstBinding,
+        reinterpret_cast<const VkWriteDescriptorSetInlineUniformBlockEXT*>(pInline));
   }
 #endif
 #endif
-protected:
-  VkDevice                     m_device         = VK_NULL_HANDLE;
-  VkDescriptorSetLayout        m_layout         = VK_NULL_HANDLE;
-  VkDescriptorPool             m_pool           = VK_NULL_HANDLE;
-  VkPipelineLayout             m_pipelineLayout = VK_NULL_HANDLE;
+ protected:
+  VkDevice m_device = VK_NULL_HANDLE;
+  VkDescriptorSetLayout m_layout = VK_NULL_HANDLE;
+  VkDescriptorPool m_pool = VK_NULL_HANDLE;
+  VkPipelineLayout m_pipelineLayout = VK_NULL_HANDLE;
   std::vector<VkDescriptorSet> m_descriptorSets = {};
-  DescriptorSetBindings        m_bindings       = {};
+  DescriptorSetBindings m_bindings = {};
 };
 
 //////////////////////////////////////////////////////////////////////////
@@ -732,13 +766,12 @@ writeUpdates.push_back(container.at(1).makeWrite(2, 0, &..));
 
 vkCmdBindDescriptorSets(cmd, GRAPHICS, container.getPipeLayout(0), 0, 1, container.at(0).getSets());
 ..
-vkCmdBindDescriptorSets(cmd, GRAPHICS, container.getPipeLayout(1), 1, 1, container.at(1).getSets(7));
-\endcode
+vkCmdBindDescriptorSets(cmd, GRAPHICS, container.getPipeLayout(1), 1, 1,
+container.at(1).getSets(7)); \endcode
 */
 template <int SETS, int PIPES = 1>
-class TDescriptorSetContainer
-{
-public:
+class TDescriptorSetContainer {
+ public:
   TDescriptorSetContainer() {}
   TDescriptorSetContainer(VkDevice device) { init(device); }
   ~TDescriptorSetContainer() { deinit(); }
@@ -749,42 +782,35 @@ class TDescriptorSetContainer
   void deinitPools();
 
   // pipelayout uses range of m_sets[0.. first null or SETS[
-  VkPipelineLayout initPipeLayout(uint32_t                    pipe,
-                                  uint32_t                    numRanges = 0,
-                                  const VkPushConstantRange*  ranges    = nullptr,
-                                  VkPipelineLayoutCreateFlags flags     = 0);
+  VkPipelineLayout initPipeLayout(uint32_t pipe, uint32_t numRanges = 0,
+                                  const VkPushConstantRange* ranges = nullptr,
+                                  VkPipelineLayoutCreateFlags flags = 0);
 
   // pipelayout uses range of m_sets[0..numDsets[
-  VkPipelineLayout initPipeLayout(uint32_t                    pipe,
-                                  uint32_t                    numDsets,
-                                  uint32_t                    numRanges = 0,
-                                  const VkPushConstantRange*  ranges    = nullptr,
-                                  VkPipelineLayoutCreateFlags flags     = 0);
+  VkPipelineLayout initPipeLayout(uint32_t pipe, uint32_t numDsets, uint32_t numRanges = 0,
+                                  const VkPushConstantRange* ranges = nullptr,
+                                  VkPipelineLayoutCreateFlags flags = 0);
 
-  DescriptorSetContainer&       at(uint32_t set) { return m_sets[set]; }
+  DescriptorSetContainer& at(uint32_t set) { return m_sets[set]; }
   const DescriptorSetContainer& at(uint32_t set) const { return m_sets[set]; }
-  DescriptorSetContainer&       operator[](uint32_t set) { return m_sets[set]; }
+  DescriptorSetContainer& operator[](uint32_t set) { return m_sets[set]; }
   const DescriptorSetContainer& operator[](uint32_t set) const { return m_sets[set]; }
-  VkPipelineLayout              getPipeLayout(uint32_t pipe = 0) const
-  {
+  VkPipelineLayout getPipeLayout(uint32_t pipe = 0) const {
     assert(pipe <= PIPES);
     return m_pipelayouts[pipe];
   }
 
-protected:
-  VkPipelineLayout       m_pipelayouts[PIPES] = {};
+ protected:
+  VkPipelineLayout m_pipelayouts[PIPES] = {};
   DescriptorSetContainer m_sets[SETS];
 };
 
 //////////////////////////////////////////////////////////////////////////
 
 template <int SETS, int PIPES>
-VkPipelineLayout TDescriptorSetContainer<SETS, PIPES>::initPipeLayout(uint32_t                    pipe,
-                                                                      uint32_t                    numDsets,
-                                                                      uint32_t                    numRanges /*= 0*/,
-                                                                      const VkPushConstantRange*  ranges /*= nullptr*/,
-                                                                      VkPipelineLayoutCreateFlags flags /*= 0*/)
-{
+VkPipelineLayout TDescriptorSetContainer<SETS, PIPES>::initPipeLayout(
+    uint32_t pipe, uint32_t numDsets, uint32_t numRanges /*= 0*/,
+    const VkPushConstantRange* ranges /*= nullptr*/, VkPipelineLayoutCreateFlags flags /*= 0*/) {
   assert(pipe <= uint32_t(PIPES));
   assert(numDsets <= uint32_t(SETS));
   assert(m_pipelayouts[pipe] == VK_NULL_HANDLE);
@@ -792,19 +818,18 @@ VkPipelineLayout TDescriptorSetContainer<SETS, PIPES>::initPipeLayout(uint32_t
   VkDevice device = m_sets[0].getDevice();
 
   VkDescriptorSetLayout setLayouts[SETS];
-  for(uint32_t d = 0; d < numDsets; d++)
-  {
+  for (uint32_t d = 0; d < numDsets; d++) {
     setLayouts[d] = m_sets[d].getLayout();
     assert(setLayouts[d]);
   }
 
-  VkResult                   result;
+  VkResult result;
   VkPipelineLayoutCreateInfo layoutCreateInfo = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
-  layoutCreateInfo.setLayoutCount             = numDsets;
-  layoutCreateInfo.pSetLayouts                = setLayouts;
-  layoutCreateInfo.pushConstantRangeCount     = numRanges;
-  layoutCreateInfo.pPushConstantRanges        = ranges;
-  layoutCreateInfo.flags                      = flags;
+  layoutCreateInfo.setLayoutCount = numDsets;
+  layoutCreateInfo.pSetLayouts = setLayouts;
+  layoutCreateInfo.pushConstantRangeCount = numRanges;
+  layoutCreateInfo.pPushConstantRanges = ranges;
+  layoutCreateInfo.flags = flags;
 
   result = vkCreatePipelineLayout(device, &layoutCreateInfo, nullptr, &m_pipelayouts[pipe]);
   assert(result == VK_SUCCESS);
@@ -812,32 +837,28 @@ VkPipelineLayout TDescriptorSetContainer<SETS, PIPES>::initPipeLayout(uint32_t
 }
 
 template <int SETS, int PIPES>
-VkPipelineLayout TDescriptorSetContainer<SETS, PIPES>::initPipeLayout(uint32_t                    pipe,
-                                                                      uint32_t                    numRanges /*= 0*/,
-                                                                      const VkPushConstantRange*  ranges /*= nullptr*/,
-                                                                      VkPipelineLayoutCreateFlags flags /*= 0*/)
-{
+VkPipelineLayout TDescriptorSetContainer<SETS, PIPES>::initPipeLayout(
+    uint32_t pipe, uint32_t numRanges /*= 0*/, const VkPushConstantRange* ranges /*= nullptr*/,
+    VkPipelineLayoutCreateFlags flags /*= 0*/) {
   assert(pipe <= uint32_t(PIPES));
   assert(m_pipelayouts[pipe] == VK_NULL_HANDLE);
 
   VkDevice device = m_sets[0].getDevice();
 
   VkDescriptorSetLayout setLayouts[SETS];
-  int                   used;
-  for(used = 0; used < SETS; used++)
-  {
+  int used;
+  for (used = 0; used < SETS; used++) {
     setLayouts[used] = m_sets[used].getLayout();
-    if(!setLayouts[used])
-      break;
+    if (!setLayouts[used]) break;
   }
 
-  VkResult                   result;
+  VkResult result;
   VkPipelineLayoutCreateInfo layoutCreateInfo = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
-  layoutCreateInfo.setLayoutCount             = uint32_t(used);
-  layoutCreateInfo.pSetLayouts                = setLayouts;
-  layoutCreateInfo.pushConstantRangeCount     = numRanges;
-  layoutCreateInfo.pPushConstantRanges        = ranges;
-  layoutCreateInfo.flags                      = flags;
+  layoutCreateInfo.setLayoutCount = uint32_t(used);
+  layoutCreateInfo.pSetLayouts = setLayouts;
+  layoutCreateInfo.pushConstantRangeCount = numRanges;
+  layoutCreateInfo.pPushConstantRanges = ranges;
+  layoutCreateInfo.flags = flags;
 
   result = vkCreatePipelineLayout(device, &layoutCreateInfo, nullptr, &m_pipelayouts[pipe]);
   assert(result == VK_SUCCESS);
@@ -845,48 +866,32 @@ VkPipelineLayout TDescriptorSetContainer<SETS, PIPES>::initPipeLayout(uint32_t
 }
 
 template <int SETS, int PIPES>
-void TDescriptorSetContainer<SETS, PIPES>::deinitPools()
-{
-  for(int d = 0; d < SETS; d++)
-  {
-    m_sets[d].deinitPool();
-  }
+void TDescriptorSetContainer<SETS, PIPES>::deinitPools() {
+  for (int d = 0; d < SETS; d++) { m_sets[d].deinitPool(); }
 }
 
 template <int SETS, int PIPES>
-void TDescriptorSetContainer<SETS, PIPES>::deinitLayouts()
-{
+void TDescriptorSetContainer<SETS, PIPES>::deinitLayouts() {
   VkDevice device = m_sets[0].getDevice();
 
-  for(int p = 0; p < PIPES; p++)
-  {
-    if(m_pipelayouts[p])
-    {
+  for (int p = 0; p < PIPES; p++) {
+    if (m_pipelayouts[p]) {
       vkDestroyPipelineLayout(device, m_pipelayouts[p], nullptr);
       m_pipelayouts[p] = VK_NULL_HANDLE;
     }
   }
-  for(int d = 0; d < SETS; d++)
-  {
-    m_sets[d].deinitLayout();
-  }
+  for (int d = 0; d < SETS; d++) { m_sets[d].deinitLayout(); }
 }
 
 template <int SETS, int PIPES>
-void TDescriptorSetContainer<SETS, PIPES>::deinit()
-{
+void TDescriptorSetContainer<SETS, PIPES>::deinit() {
   deinitPools();
   deinitLayouts();
 }
 
 template <int SETS, int PIPES>
-void TDescriptorSetContainer<SETS, PIPES>::init(VkDevice device)
-{
-  for(int d = 0; d < SETS; d++)
-  {
-    m_sets[d].init(device);
-  }
+void TDescriptorSetContainer<SETS, PIPES>::init(VkDevice device) {
+  for (int d = 0; d < SETS; d++) { m_sets[d].init(device); }
 }
 
-
 }  // namespace nvvk
diff --git a/modules/holoviz/thirdparty/nvpro_core/nvvk/memallocator_dedicated_vk.cpp b/modules/holoviz/thirdparty/nvpro_core/nvvk/memallocator_dedicated_vk.cpp
index 4997f09..3c15a03 100644
--- a/modules/holoviz/thirdparty/nvpro_core/nvvk/memallocator_dedicated_vk.cpp
+++ b/modules/holoviz/thirdparty/nvpro_core/nvvk/memallocator_dedicated_vk.cpp
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  *
- * SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -89,13 +89,13 @@ void DedicatedMemoryAllocator::deinit()
   m_device = NULL;
 }
 
-MemHandle DedicatedMemoryAllocator::allocMemory(const MemAllocateInfo& allocInfo, VkResult *pResult)
+MemHandle DedicatedMemoryAllocator::allocMemory(const MemAllocateInfo& allocInfo, uint32_t plane, VkResult *pResult)
 {
   MemAllocateInfo   localInfo(allocInfo);
   localInfo.setAllocationFlags(allocInfo.getAllocationFlags() | m_flags);
 
   BakedAllocateInfo bakedInfo;
-  fillBakedAllocateInfo(m_physicalMemoryProperties, localInfo, bakedInfo);
+  fillBakedAllocateInfo(m_physicalMemoryProperties, localInfo, plane, bakedInfo);
 
   VkDeviceMemory memory = VK_NULL_HANDLE;
   VkResult result = vkAllocateMemory(m_device, &bakedInfo.memAllocInfo, nullptr, &memory);
diff --git a/modules/holoviz/thirdparty/nvpro_core/nvvk/memallocator_dedicated_vk.hpp b/modules/holoviz/thirdparty/nvpro_core/nvvk/memallocator_dedicated_vk.hpp
index 018f86a..57a9b2c 100644
--- a/modules/holoviz/thirdparty/nvpro_core/nvvk/memallocator_dedicated_vk.hpp
+++ b/modules/holoviz/thirdparty/nvpro_core/nvvk/memallocator_dedicated_vk.hpp
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  *
- * SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -41,7 +41,7 @@ class DedicatedMemoryAllocator : public MemAllocator
   bool init(VkDevice device, VkPhysicalDevice physDevice);
   void deinit();
 
-  virtual MemHandle allocMemory(const MemAllocateInfo& allocInfo, VkResult* pResult = nullptr) override;
+  virtual MemHandle allocMemory(const MemAllocateInfo& allocInfo, uint32_t plane = 0, VkResult* pResult = nullptr) override;
   virtual void      freeMemory(MemHandle memHandle) override;
   virtual MemInfo   getMemoryInfo(MemHandle memHandle) const override;
   virtual void*     map(MemHandle memHandle, VkDeviceSize offset = 0, VkDeviceSize size = VK_WHOLE_SIZE, VkResult* pResult = nullptr) override;
diff --git a/modules/holoviz/thirdparty/nvpro_core/nvvk/memallocator_dma_vk.hpp b/modules/holoviz/thirdparty/nvpro_core/nvvk/memallocator_dma_vk.hpp
index d6b1a8e..053b748 100644
--- a/modules/holoviz/thirdparty/nvpro_core/nvvk/memallocator_dma_vk.hpp
+++ b/modules/holoviz/thirdparty/nvpro_core/nvvk/memallocator_dma_vk.hpp
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  *
- * SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -46,7 +46,7 @@ class DMAMemoryAllocator : public MemAllocator
   void deinit() { m_dma = nullptr; }
 
   // Implement MemAllocator interface
-  virtual MemHandle allocMemory(const MemAllocateInfo& allocInfo, VkResult* pResult = nullptr) override { return m_dma->allocMemory(allocInfo, pResult); }
+  virtual MemHandle allocMemory(const MemAllocateInfo& allocInfo, uint32_t plane = 0, VkResult* pResult = nullptr) override { return m_dma->allocMemory(allocInfo, plane, pResult); }
   virtual void      freeMemory(MemHandle memHandle) override { return m_dma->freeMemory(memHandle); }
   virtual MemInfo   getMemoryInfo(MemHandle memHandle) const override { return m_dma->getMemoryInfo(memHandle); }
   virtual void*     map(MemHandle memHandle, VkDeviceSize offset = 0, VkDeviceSize size = VK_WHOLE_SIZE, VkResult* pResult = nullptr) override { return m_dma->map(memHandle, offset, size, pResult); }
diff --git a/modules/holoviz/thirdparty/nvpro_core/nvvk/memallocator_vk.cpp b/modules/holoviz/thirdparty/nvpro_core/nvvk/memallocator_vk.cpp
index 008629b..94935a3 100644
--- a/modules/holoviz/thirdparty/nvpro_core/nvvk/memallocator_vk.cpp
+++ b/modules/holoviz/thirdparty/nvpro_core/nvvk/memallocator_vk.cpp
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  *
- * SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -27,98 +27,84 @@ namespace nvvk {
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-MemAllocateInfo::MemAllocateInfo(const VkMemoryRequirements& memReqs, VkMemoryPropertyFlags memProps, bool isTilingOptimal)
-    : m_memReqs(memReqs)
-    , m_memProps(memProps)
-    , m_isTilingOptimal(isTilingOptimal)
-{
-}
+MemAllocateInfo::MemAllocateInfo(const VkMemoryRequirements& memReqs,
+                                 VkMemoryPropertyFlags memProps, bool isTilingOptimal)
+    : m_memReqs({memReqs}), m_memProps(memProps), m_isTilingOptimal(isTilingOptimal) {}
+
+MemAllocateInfo::MemAllocateInfo(const std::vector<VkMemoryRequirements>& memReqs,
+                                 VkMemoryPropertyFlags memProps, bool isTilingOptimal)
+    : m_memReqs(memReqs), m_memProps(memProps), m_isTilingOptimal(isTilingOptimal) {}
 
-MemAllocateInfo::MemAllocateInfo(VkDevice device, VkBuffer buffer, VkMemoryPropertyFlags memProps)
-{
-  VkBufferMemoryRequirementsInfo2 bufferReqs = {VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, nullptr, buffer};
-  VkMemoryDedicatedRequirements   dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
-  VkMemoryRequirements2           memReqs       = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, &dedicatedRegs};
+MemAllocateInfo::MemAllocateInfo(VkDevice device, VkBuffer buffer, VkMemoryPropertyFlags memProps) {
+  VkBufferMemoryRequirementsInfo2 bufferReqs = {
+      VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, nullptr, buffer};
+  VkMemoryDedicatedRequirements dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
+  VkMemoryRequirements2 memReqs = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, &dedicatedRegs};
 
   vkGetBufferMemoryRequirements2(device, &bufferReqs, &memReqs);
 
-  m_memReqs  = memReqs.memoryRequirements;
+  m_memReqs = {memReqs.memoryRequirements};
   m_memProps = memProps;
 
-  if(dedicatedRegs.requiresDedicatedAllocation)
-  {
-    setDedicatedBuffer(buffer);
-  }
+  if (dedicatedRegs.requiresDedicatedAllocation) { setDedicatedBuffer(buffer); }
 
   setTilingOptimal(false);
 }
 
-MemAllocateInfo::MemAllocateInfo(VkDevice device, VkImage image, VkMemoryPropertyFlags memProps)
-{
-  VkImageMemoryRequirementsInfo2 imageReqs     = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, nullptr, image};
-  VkMemoryDedicatedRequirements  dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
-  VkMemoryRequirements2          memReqs       = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, &dedicatedRegs};
+MemAllocateInfo::MemAllocateInfo(VkDevice device, VkImage image, VkMemoryPropertyFlags memProps) {
+  VkImageMemoryRequirementsInfo2 imageReqs = {
+      VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, nullptr, image};
+  VkMemoryDedicatedRequirements dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
+  VkMemoryRequirements2 memReqs = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, &dedicatedRegs};
 
   vkGetImageMemoryRequirements2(device, &imageReqs, &memReqs);
 
-  m_memReqs  = memReqs.memoryRequirements;
+  m_memReqs = {memReqs.memoryRequirements};
   m_memProps = memProps;
 
-  if(dedicatedRegs.requiresDedicatedAllocation)
-  {
-    setDedicatedImage(image);
-  }
+  if (dedicatedRegs.requiresDedicatedAllocation) { setDedicatedImage(image); }
 
   setTilingOptimal(true);
 }
 
-MemAllocateInfo& MemAllocateInfo::setDedicatedImage(VkImage image)
-{
+MemAllocateInfo& MemAllocateInfo::setDedicatedImage(VkImage image) {
   assert(!m_dedicatedBuffer);
   m_dedicatedImage = image;
 
   return *this;
 }
-MemAllocateInfo& MemAllocateInfo::setDedicatedBuffer(VkBuffer buffer)
-{
+MemAllocateInfo& MemAllocateInfo::setDedicatedBuffer(VkBuffer buffer) {
   assert(!m_dedicatedImage);
   m_dedicatedBuffer = buffer;
 
   return *this;
 }
-MemAllocateInfo& MemAllocateInfo::setAllocationFlags(VkMemoryAllocateFlags flags)
-{
+MemAllocateInfo& MemAllocateInfo::setAllocationFlags(VkMemoryAllocateFlags flags) {
   m_allocateFlags |= flags;
   return *this;
 }
 
-MemAllocateInfo& MemAllocateInfo::setDeviceMask(uint32_t mask)
-{
+MemAllocateInfo& MemAllocateInfo::setDeviceMask(uint32_t mask) {
   m_deviceMask = mask;
   return *this;
 }
 
-
-MemAllocateInfo& MemAllocateInfo::setDebugName(const std::string& name)
-{
+MemAllocateInfo& MemAllocateInfo::setDebugName(const std::string& name) {
   m_debugName = name;
   return *this;
 }
 
-MemAllocateInfo& MemAllocateInfo::setExportable(bool exportable)
-{
+MemAllocateInfo& MemAllocateInfo::setExportable(bool exportable) {
   m_isExportable = exportable;
   return *this;
 }
 
-MemAllocateInfo& MemAllocateInfo::setTilingOptimal(bool isTilingOptimal)
-{
+MemAllocateInfo& MemAllocateInfo::setTilingOptimal(bool isTilingOptimal) {
   m_isTilingOptimal = isTilingOptimal;
   return *this;
 }
 
-MemAllocateInfo& MemAllocateInfo::setPriority(const float priority /*= 0.5f*/)
-{
+MemAllocateInfo& MemAllocateInfo::setPriority(const float priority /*= 0.5f*/) {
   m_priority = priority;
   return *this;
 }
@@ -127,12 +113,11 @@ MemAllocateInfo& MemAllocateInfo::setPriority(const float priority /*= 0.5f*/)
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-uint32_t getMemoryType(const VkPhysicalDeviceMemoryProperties& memoryProperties, uint32_t typeBits, const VkMemoryPropertyFlags& properties)
-{
-  for(uint32_t i = 0; i < memoryProperties.memoryTypeCount; i++)
-  {
-    if(((typeBits & (1 << i)) > 0) && (memoryProperties.memoryTypes[i].propertyFlags & properties) == properties)
-    {
+uint32_t getMemoryType(const VkPhysicalDeviceMemoryProperties& memoryProperties, uint32_t typeBits,
+                       const VkMemoryPropertyFlags& properties) {
+  for (uint32_t i = 0; i < memoryProperties.memoryTypeCount; i++) {
+    if (((typeBits & (1 << i)) > 0) &&
+        (memoryProperties.memoryTypes[i].propertyFlags & properties) == properties) {
       return i;
     }
   }
@@ -140,25 +125,25 @@ uint32_t getMemoryType(const VkPhysicalDeviceMemoryProperties& memoryProperties,
   return ~0u;
 }
 
-bool fillBakedAllocateInfo(const VkPhysicalDeviceMemoryProperties& physMemProps, const MemAllocateInfo& info, BakedAllocateInfo& baked)
-{
-  baked.memAllocInfo.allocationSize = info.getMemoryRequirements().size;
+bool fillBakedAllocateInfo(const VkPhysicalDeviceMemoryProperties& physMemProps,
+                           const MemAllocateInfo& info, uint32_t plane, BakedAllocateInfo& baked) {
+  const auto& memReqs = info.getMemoryRequirements();
+  baked.memAllocInfo.allocationSize = memReqs[plane].size;
   baked.memAllocInfo.memoryTypeIndex =
-      getMemoryType(physMemProps, info.getMemoryRequirements().memoryTypeBits, info.getMemoryProperties());
+      getMemoryType(physMemProps, memReqs[plane].memoryTypeBits, info.getMemoryProperties());
 
-  // Put it last in the chain, so we can directly pass it into the DeviceMemoryAllocator::alloc function
-  if(info.getDedicatedBuffer() || info.getDedicatedImage())
-  {
+  // Put it last in the chain, so we can directly pass it into the DeviceMemoryAllocator::alloc
+  // function
+  if (info.getDedicatedBuffer() || info.getDedicatedImage()) {
     baked.dedicatedInfo.pNext = baked.memAllocInfo.pNext;
-    baked.memAllocInfo.pNext  = &baked.dedicatedInfo;
+    baked.memAllocInfo.pNext = &baked.dedicatedInfo;
 
     baked.dedicatedInfo.buffer = info.getDedicatedBuffer();
-    baked.dedicatedInfo.image  = info.getDedicatedImage();
+    baked.dedicatedInfo.image = info.getDedicatedImage();
   }
 
-  if(info.getExportable())
-  {
-    baked.exportInfo.pNext   = baked.memAllocInfo.pNext;
+  if (info.getExportable()) {
+    baked.exportInfo.pNext = baked.memAllocInfo.pNext;
     baked.memAllocInfo.pNext = &baked.exportInfo;
 #ifdef WIN32
     baked.exportInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
@@ -167,18 +152,14 @@ bool fillBakedAllocateInfo(const VkPhysicalDeviceMemoryProperties& physMemProps,
 #endif
   }
 
-  if(info.getDeviceMask() || info.getAllocationFlags())
-  {
-    baked.flagsInfo.pNext    = baked.memAllocInfo.pNext;
+  if (info.getDeviceMask() || info.getAllocationFlags()) {
+    baked.flagsInfo.pNext = baked.memAllocInfo.pNext;
     baked.memAllocInfo.pNext = &baked.flagsInfo;
 
-    baked.flagsInfo.flags      = info.getAllocationFlags();
+    baked.flagsInfo.flags = info.getAllocationFlags();
     baked.flagsInfo.deviceMask = info.getDeviceMask();
 
-    if(baked.flagsInfo.deviceMask)
-    {
-      baked.flagsInfo.flags |= VK_MEMORY_ALLOCATE_DEVICE_MASK_BIT;
-    }
+    if (baked.flagsInfo.deviceMask) { baked.flagsInfo.flags |= VK_MEMORY_ALLOCATE_DEVICE_MASK_BIT; }
   }
 
   return true;
diff --git a/modules/holoviz/thirdparty/nvpro_core/nvvk/memallocator_vk.hpp b/modules/holoviz/thirdparty/nvpro_core/nvvk/memallocator_vk.hpp
index bf0473c..24708ea 100644
--- a/modules/holoviz/thirdparty/nvpro_core/nvvk/memallocator_vk.hpp
+++ b/modules/holoviz/thirdparty/nvpro_core/nvvk/memallocator_vk.hpp
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  *
- * SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -22,13 +22,13 @@
 #include <vulkan/vulkan_core.h>
 
 #include <string>
+#include <vector>
 
 namespace nvvk {
 class MemHandleBase;
 typedef MemHandleBase* MemHandle;
 static const MemHandle NullMemHandle = nullptr;
 
-
 /**
   \class nvvk::MemHandle
 
@@ -38,27 +38,43 @@ static const MemHandle NullMemHandle = nullptr;
 
   \class nvvk::MemAllocateInfo
 
-  nvvk::MemAllocateInfo is collecting almost all parameters a Vulkan allocation could potentially need.
-  This keeps MemAllocator's interface simple and extensible.
+  nvvk::MemAllocateInfo is collecting almost all parameters a Vulkan allocation could potentially
+  need. This keeps MemAllocator's interface simple and extensible.
 */
 
-class MemAllocateInfo
-{
-public:
-  MemAllocateInfo(const VkMemoryRequirements& memReqs,  // determine size, alignment and memory type
-                  VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,  // determine device_local, host_visible, host coherent etc...
-                  bool isTilingOptimal = false  // determine if the allocation is going to be used for an VK_IMAGE_TILING_OPTIMAL image
+class MemAllocateInfo {
+ public:
+  explicit MemAllocateInfo(
+      const VkMemoryRequirements& memReqs,  // determine size, alignment and memory type
+      VkMemoryPropertyFlags memProps =
+          VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,  // determine device_local, host_visible, host
+                                                // coherent etc...
+      bool isTilingOptimal = false  // determine if the allocation is going to be used for an
+                                    // VK_IMAGE_TILING_OPTIMAL image
+  );
+  explicit MemAllocateInfo(
+      const std::vector<VkMemoryRequirements>&
+          memReqs,  // determine size, alignment and memory type
+      VkMemoryPropertyFlags memProps =
+          VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,  // determine device_local, host_visible, host
+                                                // coherent etc...
+      bool isTilingOptimal = false  // determine if the allocation is going to be used for an
+                                    // VK_IMAGE_TILING_OPTIMAL image
   );
 
-  // Convenience constructures that infer the allocation information from the Vulkan objects directly
-  MemAllocateInfo(VkDevice device, VkBuffer buffer, VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
-  MemAllocateInfo(VkDevice device, VkImage image, VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+  // Convenience constructures that infer the allocation information from the Vulkan objects
+  // directly
+  MemAllocateInfo(VkDevice device, VkBuffer buffer,
+                  VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+  MemAllocateInfo(VkDevice device, VkImage image,
+                  VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
 
   // Determines which heap to allocate from
   MemAllocateInfo& setMemoryProperties(VkMemoryPropertyFlags flags);
   // Determines size and alignment
   MemAllocateInfo& setMemoryRequirements(VkMemoryRequirements requirements);
-  // TilingOptimal should be set for images. The allocator may choose to separate linear and tiling allocations
+  // TilingOptimal should be set for images. The allocator may choose to separate linear and tiling
+  // allocations
   MemAllocateInfo& setTilingOptimal(bool isTilingOptimal);
   // The allocation will be dedicated for the given image
   MemAllocateInfo& setDedicatedImage(VkImage image);
@@ -66,7 +82,8 @@ class MemAllocateInfo
   MemAllocateInfo& setDedicatedBuffer(VkBuffer buffer);
   // Set additional allocation flags
   MemAllocateInfo& setAllocationFlags(VkMemoryAllocateFlags flags);
-  // Set the device mask for the allocation, redirect allocations to specific device(s) in the device group
+  // Set the device mask for the allocation, redirect allocations to specific device(s) in the
+  // device group
   MemAllocateInfo& setDeviceMask(uint32_t mask);
   // Set a name for the allocation (only useful for dedicated allocations or allocators)
   MemAllocateInfo& setDebugName(const std::string& name);
@@ -75,26 +92,25 @@ class MemAllocateInfo
   // Prioritize the allocation (values 0.0 - 1.0); this may guide eviction strategies
   MemAllocateInfo& setPriority(const float priority = 0.5f);
 
-  VkImage                      getDedicatedImage() const { return m_dedicatedImage; }
-  VkBuffer                     getDedicatedBuffer() const { return m_dedicatedBuffer; }
-  VkMemoryAllocateFlags        getAllocationFlags() const { return m_allocateFlags; }
-  uint32_t                     getDeviceMask() const { return m_deviceMask; }
-  bool                         getTilingOptimal() const { return m_isTilingOptimal; }
-  const VkMemoryRequirements&  getMemoryRequirements() const { return m_memReqs; }
+  VkImage getDedicatedImage() const { return m_dedicatedImage; }
+  VkBuffer getDedicatedBuffer() const { return m_dedicatedBuffer; }
+  VkMemoryAllocateFlags getAllocationFlags() const { return m_allocateFlags; }
+  uint32_t getDeviceMask() const { return m_deviceMask; }
+  bool getTilingOptimal() const { return m_isTilingOptimal; }
+  const std::vector<VkMemoryRequirements>& getMemoryRequirements() const { return m_memReqs; }
   const VkMemoryPropertyFlags& getMemoryProperties() const { return m_memProps; }
-  std::string                  getDebugName() const { return m_debugName; }
-  bool                         getExportable() const { return m_isExportable; }
-  float                        getPriority() const { return m_priority; }
-
+  std::string getDebugName() const { return m_debugName; }
+  bool getExportable() const { return m_isExportable; }
+  float getPriority() const { return m_priority; }
 
-private:
-  VkBuffer              m_dedicatedBuffer{VK_NULL_HANDLE};
-  VkImage               m_dedicatedImage{VK_NULL_HANDLE};
+ private:
+  VkBuffer m_dedicatedBuffer{VK_NULL_HANDLE};
+  VkImage m_dedicatedImage{VK_NULL_HANDLE};
   VkMemoryAllocateFlags m_allocateFlags{0};
-  uint32_t              m_deviceMask{0};
-  VkMemoryRequirements  m_memReqs{0, 0, 0};
+  uint32_t m_deviceMask{0};
+  std::vector<VkMemoryRequirements> m_memReqs;
   VkMemoryPropertyFlags m_memProps{0};
-  float                 m_priority{0.5f};
+  float m_priority{0.5f};
 
   std::string m_debugName;
 
@@ -104,8 +120,7 @@ class MemAllocateInfo
 
 // BakedAllocateInfo is a group of allocation relevant Vulkan allocation structures,
 // which will be filled out and linked via pNext-> to be used directly via vkAllocateMemory.
-struct BakedAllocateInfo
-{
+struct BakedAllocateInfo {
   BakedAllocateInfo() = default;
 
   // In lieu of proper copy operators, need to delete them as we store
@@ -113,17 +128,19 @@ struct BakedAllocateInfo
   // wrong or out-of-scope addresses
   BakedAllocateInfo(BakedAllocateInfo&& other) = delete;
   BakedAllocateInfo operator=(BakedAllocateInfo&& other) = delete;
-  BakedAllocateInfo(const BakedAllocateInfo&)            = delete;
+  BakedAllocateInfo(const BakedAllocateInfo&) = delete;
   BakedAllocateInfo operator=(const BakedAllocateInfo) = delete;
 
-  VkMemoryAllocateInfo          memAllocInfo{VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
-  VkMemoryAllocateFlagsInfo     flagsInfo{VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO};
+  VkMemoryAllocateInfo memAllocInfo{VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
+  VkMemoryAllocateFlagsInfo flagsInfo{VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO};
   VkMemoryDedicatedAllocateInfo dedicatedInfo{VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO};
-  VkExportMemoryAllocateInfo    exportInfo{VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO};
+  VkExportMemoryAllocateInfo exportInfo{VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO};
 };
 
-bool     fillBakedAllocateInfo(const VkPhysicalDeviceMemoryProperties& physMemProps, const MemAllocateInfo& info, BakedAllocateInfo& baked);
-uint32_t getMemoryType(const VkPhysicalDeviceMemoryProperties& memoryProperties, uint32_t typeBits, const VkMemoryPropertyFlags& properties);
+bool fillBakedAllocateInfo(const VkPhysicalDeviceMemoryProperties& physMemProps,
+                           const MemAllocateInfo& info, uint32_t plane, BakedAllocateInfo& baked);
+uint32_t getMemoryType(const VkPhysicalDeviceMemoryProperties& memoryProperties, uint32_t typeBits,
+                       const VkMemoryPropertyFlags& properties);
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -138,23 +155,21 @@ uint32_t getMemoryType(const VkPhysicalDeviceMemoryProperties& memoryProperties,
  They can implement the allocator dunctionality themselves or act as an adapter to another
  memory allocator implementation.
 
- A nvvk::MemAllocator hands out opaque 'MemHandles'. The implementation of the MemAllocator interface
- may chose any type of payload to store in a MemHandle. A MemHandle's relevant information can be
- retrieved via getMemoryInfo().
+ A nvvk::MemAllocator hands out opaque 'MemHandles'. The implementation of the MemAllocator
+ interface may chose any type of payload to store in a MemHandle. A MemHandle's relevant information
+ can be retrieved via getMemoryInfo().
 */
-class MemAllocator
-{
-public:
-  struct MemInfo
-  {
+class MemAllocator {
+ public:
+  struct MemInfo {
     VkDeviceMemory memory;
-    VkDeviceSize   offset;
-    VkDeviceSize   size;
+    VkDeviceSize offset;
+    VkDeviceSize size;
   };
 
   // Allocate a piece of memory according to the requirements of allocInfo.
   // may return NullMemHandle on error (provide pResult for details)
-  virtual MemHandle allocMemory(const MemAllocateInfo& allocInfo, VkResult* pResult = nullptr) = 0;
+  virtual MemHandle allocMemory(const MemAllocateInfo& allocInfo, uint32_t plane = 0, VkResult* pResult = nullptr) = 0;
 
   // Free the memory backing 'memHandle'.
   // memHandle may be nullptr;
@@ -167,19 +182,19 @@ class MemAllocator
   // If 'memHandle' already refers to a suballocation 'offset' will be applied on top of the
   // suballocation's offset inside the device memory.
   // may return nullptr on error (provide pResult for details)
-  virtual void* map(MemHandle memHandle, VkDeviceSize offset = 0, VkDeviceSize size = VK_WHOLE_SIZE, VkResult* pResult = nullptr) = 0;
+  virtual void* map(MemHandle memHandle, VkDeviceSize offset = 0, VkDeviceSize size = VK_WHOLE_SIZE,
+                    VkResult* pResult = nullptr) = 0;
 
   // Unmap memHandle
   virtual void unmap(MemHandle memHandle) = 0;
 
   // Convenience function to allow mapping straight to a typed pointer.
   template <class T>
-  T* mapT(MemHandle memHandle, VkResult* pResult = nullptr)
-  {
+  T* mapT(MemHandle memHandle, VkResult* pResult = nullptr) {
     return (T*)map(memHandle, 0, VK_WHOLE_SIZE, pResult);
   }
 
-  virtual VkDevice         getDevice() const         = 0;
+  virtual VkDevice getDevice() const = 0;
   virtual VkPhysicalDevice getPhysicalDevice() const = 0;
 
   // Make sure the dtor is virtual
@@ -188,9 +203,8 @@ class MemAllocator
 
 // Base class for memory handles
 // Individual allocators will derive from it and fill the handles with their own data.
-class MemHandleBase
-{
-public:
+class MemHandleBase {
+ public:
   virtual ~MemHandleBase() = default;  // force the class to become virtual
 };
 
diff --git a/modules/holoviz/thirdparty/nvpro_core/nvvk/memorymanagement_vk.cpp b/modules/holoviz/thirdparty/nvpro_core/nvvk/memorymanagement_vk.cpp
index 1b74d01..c5ab9ae 100644
--- a/modules/holoviz/thirdparty/nvpro_core/nvvk/memorymanagement_vk.cpp
+++ b/modules/holoviz/thirdparty/nvpro_core/nvvk/memorymanagement_vk.cpp
@@ -13,11 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  *
- * SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 
-
 #include <algorithm>
 #include <string>
 
@@ -27,46 +26,46 @@
 #include "nvh/nvprint.hpp"
 
 namespace nvvk {
-static VkExportMemoryAllocateInfo memoryHandleEx{VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, nullptr,
-                                                 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT};  // #VKGL Special for Interop
+static VkExportMemoryAllocateInfo memoryHandleEx{
+    VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+    nullptr,
+    VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT};  // #VKGL Special for Interop
 
 bool getMemoryInfo(const VkPhysicalDeviceMemoryProperties& memoryProperties,
-                   const VkMemoryRequirements&             memReqs,
-                   VkMemoryPropertyFlags                   properties,
-                   VkMemoryAllocateInfo&                   memInfo,
-                   bool                                    preferDevice)
-{
+                   const VkMemoryRequirements& memReqs, VkMemoryPropertyFlags properties,
+                   VkMemoryAllocateInfo& memInfo, bool preferDevice) {
   memInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
   memInfo.pNext = nullptr;
 
-  if(!memReqs.size)
-  {
-    memInfo.allocationSize  = 0;
+  if (!memReqs.size) {
+    memInfo.allocationSize = 0;
     memInfo.memoryTypeIndex = ~0;
     return true;
   }
 
   // Find an available memory type that satisfies the requested properties.
-  for(uint32_t memoryTypeIndex = 0; memoryTypeIndex < memoryProperties.memoryTypeCount; ++memoryTypeIndex)
-  {
-    if((memReqs.memoryTypeBits & (1 << memoryTypeIndex))
-       // either there is a propertyFlags that also includes the combinations
-       && ((properties && (memoryProperties.memoryTypes[memoryTypeIndex].propertyFlags & properties) == properties)
-           // or it directly matches the properties (zero case)
-           || (memoryProperties.memoryTypes[memoryTypeIndex].propertyFlags == properties)))
-    {
-      memInfo.allocationSize  = memReqs.size;
+  for (uint32_t memoryTypeIndex = 0; memoryTypeIndex < memoryProperties.memoryTypeCount;
+       ++memoryTypeIndex) {
+    if ((memReqs.memoryTypeBits & (1 << memoryTypeIndex))
+        // either there is a propertyFlags that also includes the combinations
+        && ((properties && (memoryProperties.memoryTypes[memoryTypeIndex].propertyFlags &
+                            properties) == properties)
+            // or it directly matches the properties (zero case)
+            || (memoryProperties.memoryTypes[memoryTypeIndex].propertyFlags == properties))) {
+      memInfo.allocationSize = memReqs.size;
       memInfo.memoryTypeIndex = memoryTypeIndex;
       return true;
     }
   }
 
   // special case zero flag logic
-  if(properties == 0)
-  {
+  if (properties == 0) {
     // prefer something with host visible
-    return getMemoryInfo(memoryProperties, memReqs,
-                         preferDevice ? VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, memInfo);
+    return getMemoryInfo(
+        memoryProperties,
+        memReqs,
+        preferDevice ? VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
+        memInfo);
   }
 
   return false;
@@ -76,32 +75,26 @@ bool getMemoryInfo(const VkPhysicalDeviceMemoryProperties& memoryProperties,
 
 class DMAMemoryAllocator;
 
-class DMAMemoryHandle : public nvvk::MemHandleBase
-{
-public:
-  DMAMemoryHandle()                       = default;
+class DMAMemoryHandle : public nvvk::MemHandleBase {
+ public:
+  DMAMemoryHandle() = default;
   DMAMemoryHandle(const DMAMemoryHandle&) = default;
-  DMAMemoryHandle(DMAMemoryHandle&&)      = default;
+  DMAMemoryHandle(DMAMemoryHandle&&) = default;
 
   DMAMemoryHandle& operator=(const DMAMemoryHandle&) = default;
   DMAMemoryHandle& operator=(DMAMemoryHandle&&) = default;
 
   const AllocationID& getAllocationID() const { return m_allocation; };
 
-private:
+ private:
   friend class nvvk::DeviceMemoryAllocator;
-  DMAMemoryHandle(const AllocationID& allocation)
-      : m_allocation(allocation)
-  {
-  }
+  DMAMemoryHandle(const AllocationID& allocation) : m_allocation(allocation) {}
 
   AllocationID m_allocation;
 };
 
-DMAMemoryHandle* castDMAMemoryHandle(MemHandle memHandle)
-{
-  if(!memHandle)
-    return nullptr;
+DMAMemoryHandle* castDMAMemoryHandle(MemHandle memHandle) {
+  if (!memHandle) return nullptr;
 #ifndef NDEBUG
   auto dmaMemHandle = static_cast<DMAMemoryHandle*>(memHandle);
 #else
@@ -112,10 +105,10 @@ DMAMemoryHandle* castDMAMemoryHandle(MemHandle memHandle)
   return dmaMemHandle;
 }
 
-MemHandle DeviceMemoryAllocator::allocMemory(const MemAllocateInfo& allocInfo, VkResult *pResult)
-{
+MemHandle DeviceMemoryAllocator::allocMemory(const MemAllocateInfo& allocInfo, uint32_t plane,
+                                             VkResult* pResult) {
   BakedAllocateInfo bakedInfo;
-  fillBakedAllocateInfo(getMemoryProperties(), allocInfo, bakedInfo);
+  fillBakedAllocateInfo(getMemoryProperties(), allocInfo, plane, bakedInfo);
   State state = m_defaultState;
   state.allocateDeviceMask |= bakedInfo.flagsInfo.deviceMask;
   state.allocateFlags |= bakedInfo.flagsInfo.flags;
@@ -124,38 +117,35 @@ MemHandle DeviceMemoryAllocator::allocMemory(const MemAllocateInfo& allocInfo, V
   VkResult result;
   bool isDedicatedAllocation = allocInfo.getDedicatedBuffer() || allocInfo.getDedicatedImage();
 
-  auto dmaHandle = allocInternal(allocInfo.getMemoryRequirements(), allocInfo.getMemoryProperties(),
-                                !allocInfo.getTilingOptimal() /*isLinear*/,
-                                isDedicatedAllocation ? &bakedInfo.dedicatedInfo : nullptr, result, true, state);
+  auto dmaHandle = allocInternal(allocInfo.getMemoryRequirements()[plane],
+                                 allocInfo.getMemoryProperties(),
+                                 !allocInfo.getTilingOptimal() /*isLinear*/,
+                                 isDedicatedAllocation ? &bakedInfo.dedicatedInfo : nullptr,
+                                 result,
+                                 true,
+                                 state);
 
-  if (pResult)
-  {
-    *pResult = result;
-  }
+  if (pResult) { *pResult = result; }
 
-  if (dmaHandle)
-  {
+  if (dmaHandle) {
     DMAMemoryHandle* dmaMemHandle = new DMAMemoryHandle(dmaHandle);
 
     // Cannot do this, it would override the DeviceMemoryManager's chosen block buffer name
     //   if(!allocInfo.getDebugName().empty())
     //   {
     //     const MemInfo& memInfo = getMemoryInfo(dmaMemHandle);
-    //     nvvk::DebugUtil(m_dma.getDevice()).setObjectName(memInfo.memory, allocInfo.getDebugName());
+    //     nvvk::DebugUtil(m_dma.getDevice()).setObjectName(memInfo.memory,
+    //     allocInfo.getDebugName());
     //   }
 
     return dmaMemHandle;
-  }
-  else
-  {
+  } else {
     return NullMemHandle;
   }
 }
 
-void DeviceMemoryAllocator::freeMemory(MemHandle memHandle)
-{
-  if(!memHandle)
-    return;
+void DeviceMemoryAllocator::freeMemory(MemHandle memHandle) {
+  if (!memHandle) return;
 
   auto dmaHandle = castDMAMemoryHandle(memHandle);
   assert(dmaHandle);
@@ -167,32 +157,29 @@ void DeviceMemoryAllocator::freeMemory(MemHandle memHandle)
   return;
 }
 
-MemAllocator::MemInfo DeviceMemoryAllocator::getMemoryInfo(MemHandle memHandle) const
-{
+MemAllocator::MemInfo DeviceMemoryAllocator::getMemoryInfo(MemHandle memHandle) const {
   MemInfo info;
 
   auto dmaHandle = castDMAMemoryHandle(memHandle);
   assert(dmaHandle);
 
   auto& allocInfo = getAllocation(dmaHandle->getAllocationID());
-  info.memory     = allocInfo.mem;
-  info.offset     = allocInfo.offset;
-  info.size       = allocInfo.size;
+  info.memory = allocInfo.mem;
+  info.offset = allocInfo.offset;
+  info.size = allocInfo.size;
 
   return info;
 };
 
-nvvk::AllocationID DeviceMemoryAllocator::getAllocationID(MemHandle memHandle) const
-{
+nvvk::AllocationID DeviceMemoryAllocator::getAllocationID(MemHandle memHandle) const {
   auto dmaHandle = castDMAMemoryHandle(memHandle);
   assert(dmaHandle);
 
   return dmaHandle->getAllocationID();
 }
 
-
-void* DeviceMemoryAllocator::map(MemHandle memHandle, VkDeviceSize offset, VkDeviceSize size, VkResult *pResult)
-{
+void* DeviceMemoryAllocator::map(MemHandle memHandle, VkDeviceSize offset, VkDeviceSize size,
+                                 VkResult* pResult) {
   auto dmaHandle = castDMAMemoryHandle(memHandle);
   assert(dmaHandle);
 
@@ -200,8 +187,7 @@ void* DeviceMemoryAllocator::map(MemHandle memHandle, VkDeviceSize offset, VkDev
   return ptr;
 }
 
-void DeviceMemoryAllocator::unmap(MemHandle memHandle)
-{
+void DeviceMemoryAllocator::unmap(MemHandle memHandle) {
   auto dmaHandle = castDMAMemoryHandle(memHandle);
   assert(dmaHandle);
 
@@ -213,26 +199,24 @@ const VkMemoryDedicatedAllocateInfo* DeviceMemoryAllocator::DEDICATED_PROXY =
 
 int DeviceMemoryAllocator::s_allocDebugBias = 0;
 
-//#define DEBUG_ALLOCID   8
+// #define DEBUG_ALLOCID   8
 
-nvvk::AllocationID DeviceMemoryAllocator::createID(Allocation& allocation, BlockID block, uint32_t blockOffset, uint32_t blockSize)
-{
+nvvk::AllocationID DeviceMemoryAllocator::createID(Allocation& allocation, BlockID block,
+                                                   uint32_t blockOffset, uint32_t blockSize) {
   // find free slot
-  if(m_freeAllocationIndex != INVALID_ID_INDEX)
-  {
+  if (m_freeAllocationIndex != INVALID_ID_INDEX) {
     uint32_t index = m_freeAllocationIndex;
 
-    m_freeAllocationIndex            = m_allocations[index].id.instantiate((uint32_t)index);
-    m_allocations[index].allocation  = allocation;
-    m_allocations[index].block       = block;
+    m_freeAllocationIndex = m_allocations[index].id.instantiate((uint32_t)index);
+    m_allocations[index].allocation = allocation;
+    m_allocations[index].block = block;
     m_allocations[index].blockOffset = blockOffset;
-    m_allocations[index].blockSize   = blockSize;
+    m_allocations[index].blockSize = blockSize;
 #if DEBUG_ALLOCID
     // debug some specific id, useful to track allocation leaks
-    if(index == DEBUG_ALLOCID)
-    {
+    if (index == DEBUG_ALLOCID) {
       int breakHere = 0;
-      breakHere     = breakHere;
+      breakHere = breakHere;
     }
 #endif
     return m_allocations[index].id;
@@ -242,34 +226,31 @@ nvvk::AllocationID DeviceMemoryAllocator::createID(Allocation& allocation, Block
   AllocationInfo info;
   info.allocation = allocation;
   info.id.instantiate((uint32_t)m_allocations.size());
-  info.block       = block;
+  info.block = block;
   info.blockOffset = blockOffset;
-  info.blockSize   = blockSize;
+  info.blockSize = blockSize;
 
   m_allocations.push_back(info);
 
 #if DEBUG_ALLOCID
   // debug some specific id, useful to track allocation leaks
-  if(info.id.index == DEBUG_ALLOCID)
-  {
+  if (info.id.index == DEBUG_ALLOCID) {
     int breakHere = 0;
-    breakHere     = breakHere;
+    breakHere = breakHere;
   }
 #endif
 
   return info.id;
 }
 
-void DeviceMemoryAllocator::destroyID(AllocationID id)
-{
+void DeviceMemoryAllocator::destroyID(AllocationID id) {
   assert(m_allocations[id.index].id.isEqual(id));
 
 #if DEBUG_ALLOCID
   // debug some specific id, useful to track allocation leaks
-  if(id.index == DEBUG_ALLOCID)
-  {
+  if (id.index == DEBUG_ALLOCID) {
     int breakHere = 0;
-    breakHere     = breakHere;
+    breakHere = breakHere;
   }
 #endif
 
@@ -280,39 +261,34 @@ void DeviceMemoryAllocator::destroyID(AllocationID id)
 
 const float DeviceMemoryAllocator::DEFAULT_PRIORITY = 0.5f;
 
-void DeviceMemoryAllocator::init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize blockSize, VkDeviceSize maxSize)
-{
+void DeviceMemoryAllocator::init(VkDevice device, VkPhysicalDevice physicalDevice,
+                                 VkDeviceSize blockSize, VkDeviceSize maxSize) {
   assert(!m_device);
-  m_device         = device;
+  m_device = device;
   m_physicalDevice = physicalDevice;
   // always default to NVVK_DEFAULT_MEMORY_BLOCKSIZE
-  m_blockSize      = blockSize ? blockSize : NVVK_DEFAULT_MEMORY_BLOCKSIZE;
+  m_blockSize = blockSize ? blockSize : NVVK_DEFAULT_MEMORY_BLOCKSIZE;
 
   vkGetPhysicalDeviceMemoryProperties(physicalDevice, &m_memoryProperties);
 
   // Retrieving the max allocation size, can be lowered with maxSize
-  VkPhysicalDeviceProperties2            prop2{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2};
-  VkPhysicalDeviceMaintenance3Properties vkProp{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES};
+  VkPhysicalDeviceProperties2 prop2{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2};
+  VkPhysicalDeviceMaintenance3Properties vkProp{
+      VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES};
   prop2.pNext = &vkProp;
   vkGetPhysicalDeviceProperties2(physicalDevice, &prop2);
-  m_maxAllocationSize = maxSize > 0 ? std::min(maxSize, vkProp.maxMemoryAllocationSize) : vkProp.maxMemoryAllocationSize;
-
+  m_maxAllocationSize = maxSize > 0 ? std::min(maxSize, vkProp.maxMemoryAllocationSize)
+                                    : vkProp.maxMemoryAllocationSize;
 
   assert(m_blocks.empty());
   assert(m_allocations.empty());
 }
 
-void DeviceMemoryAllocator::freeAll()
-{
-  for(const auto& it : m_blocks)
-  {
-    if(!it.mem)
-      continue;
+void DeviceMemoryAllocator::freeAll() {
+  for (const auto& it : m_blocks) {
+    if (!it.mem) continue;
 
-    if(it.mapped)
-    {
-      vkUnmapMemory(m_device, it.mem);
-    }
+    if (it.mapped) { vkUnmapMemory(m_device, it.mem); }
     vkFreeMemory(m_device, it.mem, nullptr);
   }
 
@@ -320,42 +296,29 @@ void DeviceMemoryAllocator::freeAll()
   m_blocks.clear();
   resizeBlocks(0);
 
-  m_freeBlockIndex      = INVALID_ID_INDEX;
+  m_freeBlockIndex = INVALID_ID_INDEX;
   m_freeAllocationIndex = INVALID_ID_INDEX;
 }
 
-void DeviceMemoryAllocator::deinit()
-{
-  if(!m_device)
-    return;
+void DeviceMemoryAllocator::deinit() {
+  if (!m_device) return;
 
-  for(const auto& it : m_blocks)
-  {
-    if(it.mapped)
-    {
+  for (const auto& it : m_blocks) {
+    if (it.mapped) {
       assert("not all blocks were unmapped properly");
-      if(it.mem)
-      {
-        vkUnmapMemory(m_device, it.mem);
-      }
+      if (it.mem) { vkUnmapMemory(m_device, it.mem); }
     }
-    if(it.mem)
-    {
-      if(it.isFirst && m_keepFirst)
-      {
+    if (it.mem) {
+      if (it.isFirst && m_keepFirst) {
         vkFreeMemory(m_device, it.mem, nullptr);
-      }
-      else
-      {
+      } else {
         assert("not all blocks were freed properly");
       }
     }
   }
 
-  for(size_t i = 0; i < m_allocations.size(); i++)
-  {
-    if(m_allocations[i].id.index == (uint32_t)i)
-    {
+  for (size_t i = 0; i < m_allocations.size(); i++) {
+    if (m_allocations[i].id.index == (uint32_t)i) {
       assert(0 && i && "AllocationID not freed");
 
       // set DEBUG_ALLOCID define further up to trace this id
@@ -366,38 +329,34 @@ void DeviceMemoryAllocator::deinit()
   m_blocks.clear();
   resizeBlocks(0);
 
-  m_freeBlockIndex      = INVALID_ID_INDEX;
+  m_freeBlockIndex = INVALID_ID_INDEX;
   m_freeAllocationIndex = INVALID_ID_INDEX;
-  m_device              = VK_NULL_HANDLE;
+  m_device = VK_NULL_HANDLE;
 }
 
-VkDeviceSize DeviceMemoryAllocator::getMaxAllocationSize() const
-{
+VkDeviceSize DeviceMemoryAllocator::getMaxAllocationSize() const {
   return m_maxAllocationSize;
 }
 
-float DeviceMemoryAllocator::getUtilization(VkDeviceSize& allocatedSize, VkDeviceSize& usedSize) const
-{
+float DeviceMemoryAllocator::getUtilization(VkDeviceSize& allocatedSize,
+                                            VkDeviceSize& usedSize) const {
   allocatedSize = m_allocatedSize;
-  usedSize      = m_usedSize;
+  usedSize = m_usedSize;
 
   return float(double(usedSize) / double(allocatedSize));
 }
 
-void DeviceMemoryAllocator::nvprintReport() const
-{
-  VkDeviceSize used[VK_MAX_MEMORY_HEAPS]      = {0};
+void DeviceMemoryAllocator::nvprintReport() const {
+  VkDeviceSize used[VK_MAX_MEMORY_HEAPS] = {0};
   VkDeviceSize allocated[VK_MAX_MEMORY_HEAPS] = {0};
-  uint32_t     active[VK_MAX_MEMORY_HEAPS]    = {0};
-  uint32_t     dedicated[VK_MAX_MEMORY_HEAPS] = {0};
-  uint32_t     linear[VK_MAX_MEMORY_HEAPS]    = {0};
+  uint32_t active[VK_MAX_MEMORY_HEAPS] = {0};
+  uint32_t dedicated[VK_MAX_MEMORY_HEAPS] = {0};
+  uint32_t linear[VK_MAX_MEMORY_HEAPS] = {0};
 
   uint32_t dedicatedSum = 0;
-  uint32_t linearSum    = 0;
-  for(const auto& block : m_blocks)
-  {
-    if(block.mem)
-    {
+  uint32_t linearSum = 0;
+  for (const auto& block : m_blocks) {
+    if (block.mem) {
       uint32_t heapIndex = m_memoryProperties.memoryTypes[block.memoryTypeIndex].heapIndex;
       used[heapIndex] += block.usedSize;
       allocated[heapIndex] += block.allocationSize;
@@ -412,12 +371,13 @@ void DeviceMemoryAllocator::nvprintReport() const
   }
 
   LOGI("nvvk::DeviceMemoryAllocator %p\n", this);
-  {
-    LOGI("  count : dedicated, linear,  all (device-local)\n");
-  }
-  for(uint32_t i = 0; i < m_memoryProperties.memoryHeapCount; i++)
-  {
-    LOGI("  heap%d : %9d, %6d, %4d (%d)\n", i, dedicated[i], linear[i], active[i],
+  { LOGI("  count : dedicated, linear,  all (device-local)\n"); }
+  for (uint32_t i = 0; i < m_memoryProperties.memoryHeapCount; i++) {
+    LOGI("  heap%d : %9d, %6d, %4d (%d)\n",
+         i,
+         dedicated[i],
+         linear[i],
+         active[i],
          (m_memoryProperties.memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) ? 1 : 0);
   }
 
@@ -425,29 +385,30 @@ void DeviceMemoryAllocator::nvprintReport() const
     LOGI("  total : %9d, %6d, %4d\n", dedicatedSum, linearSum, m_activeBlockCount);
     LOGI("  size  :      used / allocated / available KB (device-local)\n");
   }
-  for(uint32_t i = 0; i < m_memoryProperties.memoryHeapCount; i++)
-  {
-    LOGI("  heap%d : %9d / %9d / %9d (%d)\n", i, uint32_t((used[i] + 1023) / 1024),
-         uint32_t((allocated[i] + 1023) / 1024), uint32_t((m_memoryProperties.memoryHeaps[i].size + 1023) / 1024),
+  for (uint32_t i = 0; i < m_memoryProperties.memoryHeapCount; i++) {
+    LOGI("  heap%d : %9d / %9d / %9d (%d)\n",
+         i,
+         uint32_t((used[i] + 1023) / 1024),
+         uint32_t((allocated[i] + 1023) / 1024),
+         uint32_t((m_memoryProperties.memoryHeaps[i].size + 1023) / 1024),
          (m_memoryProperties.memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) ? 1 : 0);
   }
   {
-    LOGI("  total : %9d / %9d KB (%d percent)\n\n", uint32_t((m_usedSize + 1023) / 1024),
-         uint32_t((m_allocatedSize + 1023) / 1024), uint32_t(double(m_usedSize) * 100.0 / double(m_allocatedSize)));
+    LOGI("  total : %9d / %9d KB (%d percent)\n\n",
+         uint32_t((m_usedSize + 1023) / 1024),
+         uint32_t((m_allocatedSize + 1023) / 1024),
+         uint32_t(double(m_usedSize) * 100.0 / double(m_allocatedSize)));
   }
 }
 
-void DeviceMemoryAllocator::getTypeStats(uint32_t     count[VK_MAX_MEMORY_TYPES],
+void DeviceMemoryAllocator::getTypeStats(uint32_t count[VK_MAX_MEMORY_TYPES],
                                          VkDeviceSize used[VK_MAX_MEMORY_TYPES],
-                                         VkDeviceSize allocated[VK_MAX_MEMORY_TYPES]) const
-{
+                                         VkDeviceSize allocated[VK_MAX_MEMORY_TYPES]) const {
   memset(used, 0, sizeof(used[0]) * VK_MAX_MEMORY_TYPES);
   memset(allocated, 0, sizeof(allocated[0]) * VK_MAX_MEMORY_TYPES);
 
-  for(const auto& block : m_blocks)
-  {
-    if(block.mem)
-    {
+  for (const auto& block : m_blocks) {
+    if (block.mem) {
       count[block.memoryTypeIndex]++;
       used[block.memoryTypeIndex] += block.usedSize;
       allocated[block.memoryTypeIndex] += block.allocationSize;
@@ -455,61 +416,53 @@ void DeviceMemoryAllocator::getTypeStats(uint32_t     count[VK_MAX_MEMORY_TYPES]
   }
 }
 
-VkDevice DeviceMemoryAllocator::getDevice() const
-{
+VkDevice DeviceMemoryAllocator::getDevice() const {
   return m_device;
 }
 
-VkPhysicalDevice DeviceMemoryAllocator::getPhysicalDevice() const
-{
+VkPhysicalDevice DeviceMemoryAllocator::getPhysicalDevice() const {
   return m_physicalDevice;
 }
 
-const nvvk::Allocation& DeviceMemoryAllocator::getAllocation(AllocationID id) const
-{
+const nvvk::Allocation& DeviceMemoryAllocator::getAllocation(AllocationID id) const {
   assert(m_allocations[id.index].id.isEqual(id));
   return m_allocations[id.index].allocation;
 }
 
-const VkPhysicalDeviceMemoryProperties& DeviceMemoryAllocator::getMemoryProperties() const
-{
+const VkPhysicalDeviceMemoryProperties& DeviceMemoryAllocator::getMemoryProperties() const {
   return m_memoryProperties;
 }
 
-AllocationID DeviceMemoryAllocator::allocInternal(const VkMemoryRequirements&          memReqs,
-                                                  VkMemoryPropertyFlags                memProps,
-                                                  bool                                 isLinear,
+AllocationID DeviceMemoryAllocator::allocInternal(const VkMemoryRequirements& memReqs,
+                                                  VkMemoryPropertyFlags memProps, bool isLinear,
                                                   const VkMemoryDedicatedAllocateInfo* dedicated,
-                                                  VkResult&                            result,
-                                                  bool                                 preferDevice,
-                                                  const State&                         state)
-{
+                                                  VkResult& result, bool preferDevice,
+                                                  const State& state) {
   VkMemoryAllocateInfo memInfo;
 
   result = VK_SUCCESS;
 
   // Fill out allocation info structure
-  if(memReqs.size > m_maxAllocationSize || !nvvk::getMemoryInfo(m_memoryProperties, memReqs, memProps, memInfo, preferDevice))
-  {
+  if (memReqs.size > m_maxAllocationSize ||
+      !nvvk::getMemoryInfo(m_memoryProperties, memReqs, memProps, memInfo, preferDevice)) {
     result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
     return AllocationID();
   }
 
   float priority = m_supportsPriority ? state.priority : DEFAULT_PRIORITY;
-  bool  isFirst  = !dedicated;
+  bool isFirst = !dedicated;
 
-  if(!dedicated)
-  {
+  if (!dedicated) {
     // First try to find an existing memory block that we can use
 
-    for(uint32_t i = 0; i < (uint32_t)m_blocks.size(); i++)
-    {
+    for (uint32_t i = 0; i < (uint32_t)m_blocks.size(); i++) {
       Block& block = m_blocks[i];
 
       // Ignore invalid or blocks with the wrong memory type
-      if(!block.mem || block.memoryTypeIndex != memInfo.memoryTypeIndex || isLinear != block.isLinear || block.priority != priority
-         || block.allocateFlags != state.allocateFlags || block.allocateDeviceMask != state.allocateDeviceMask)
-      {
+      if (!block.mem || block.memoryTypeIndex != memInfo.memoryTypeIndex ||
+          isLinear != block.isLinear || block.priority != priority ||
+          block.allocateFlags != state.allocateFlags ||
+          block.allocateDeviceMask != state.allocateDeviceMask) {
         continue;
       }
 
@@ -520,18 +473,20 @@ AllocationID DeviceMemoryAllocator::allocInternal(const VkMemoryRequirements&
       uint32_t blockOffset;
       uint32_t offset;
 
-
       // Look for a block which has enough free space available
 
-      if(block.range.subAllocate((uint32_t)memReqs.size, (uint32_t)memReqs.alignment, blockOffset, offset, blockSize))
-      {
+      if (block.range.subAllocate((uint32_t)memReqs.size,
+                                  (uint32_t)memReqs.alignment,
+                                  blockOffset,
+                                  offset,
+                                  blockSize)) {
         block.allocationCount++;
         block.usedSize += blockSize;
 
         Allocation allocation;
-        allocation.mem    = block.mem;
+        allocation.mem = block.mem;
         allocation.offset = offset;
-        allocation.size   = memReqs.size;
+        allocation.size = memReqs.size;
 
         m_usedSize += blockSize;
 
@@ -542,14 +497,11 @@ AllocationID DeviceMemoryAllocator::allocInternal(const VkMemoryRequirements&
 
   // find available blockID or create new one
   BlockID id;
-  if(m_freeBlockIndex != INVALID_ID_INDEX)
-  {
-    Block& block     = m_blocks[m_freeBlockIndex];
+  if (m_freeBlockIndex != INVALID_ID_INDEX) {
+    Block& block = m_blocks[m_freeBlockIndex];
     m_freeBlockIndex = block.id.instantiate(m_freeBlockIndex);
-    id               = block.id;
-  }
-  else
-  {
+    id = block.id;
+  } else {
     uint32_t newIndex = (uint32_t)m_blocks.size();
     m_blocks.resize(m_blocks.size() + 1);
     resizeBlocks(newIndex + 1);
@@ -561,45 +513,39 @@ AllocationID DeviceMemoryAllocator::allocInternal(const VkMemoryRequirements&
   Block& block = m_blocks[id.index];
 
   // enforce custom block under certain conditions
-  if(dedicated == DEDICATED_PROXY || memReqs.size > ((m_blockSize * 2) / 3))
-  {
+  if (dedicated == DEDICATED_PROXY || memReqs.size > ((m_blockSize * 2) / 3)) {
     block.allocationSize = memReqs.size;
-  }
-  else if(dedicated)
-  {
+  } else if (dedicated) {
     block.allocationSize = memReqs.size;
-    memInfo.pNext        = dedicated;
-  }
-  else
-  {
+    memInfo.pNext = dedicated;
+  } else {
     block.allocationSize = std::max(m_blockSize, memReqs.size);
   }
 
-  VkMemoryPriorityAllocateInfoEXT memPriority = {VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT};
-  if(priority != DEFAULT_PRIORITY)
-  {
-    memPriority.pNext    = memInfo.pNext;
+  VkMemoryPriorityAllocateInfoEXT memPriority = {
+      VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT};
+  if (priority != DEFAULT_PRIORITY) {
+    memPriority.pNext = memInfo.pNext;
     memPriority.priority = priority;
-    memInfo.pNext        = &memPriority;
+    memInfo.pNext = &memPriority;
   }
 
   VkMemoryAllocateFlagsInfo memFlags = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO};
-  if(state.allocateFlags)
-  {
-    memFlags.pNext      = memInfo.pNext;
+  if (state.allocateFlags) {
+    memFlags.pNext = memInfo.pNext;
     memFlags.deviceMask = state.allocateDeviceMask;
-    memFlags.flags      = state.allocateFlags;
-    memInfo.pNext       = &memFlags;
+    memFlags.flags = state.allocateFlags;
+    memInfo.pNext = &memFlags;
   }
 
-  block.allocationSize  = block.range.alignedSize((uint32_t)block.allocationSize);
-  block.priority        = priority;
+  block.allocationSize = block.range.alignedSize((uint32_t)block.allocationSize);
+  block.priority = priority;
   block.memoryTypeIndex = memInfo.memoryTypeIndex;
   block.range.init((uint32_t)block.allocationSize);
-  block.isLinear           = isLinear;
-  block.isFirst            = isFirst;
-  block.isDedicated        = dedicated != nullptr;
-  block.allocateFlags      = state.allocateFlags;
+  block.isLinear = isLinear;
+  block.isFirst = isFirst;
+  block.isDedicated = dedicated != nullptr;
+  block.allocateFlags = state.allocateFlags;
   block.allocateDeviceMask = state.allocateDeviceMask;
 
   // set allocationSize from aligned block.allocationSize
@@ -607,8 +553,7 @@ AllocationID DeviceMemoryAllocator::allocInternal(const VkMemoryRequirements&
 
   result = allocBlockMemory(id, memInfo, block.mem);
 
-  if(result == VK_SUCCESS)
-  {
+  if (result == VK_SUCCESS) {
     nvvk::DebugUtil(m_device).setObjectName(block.mem, m_debugName);
 
     m_allocatedSize += block.allocationSize;
@@ -617,49 +562,44 @@ AllocationID DeviceMemoryAllocator::allocInternal(const VkMemoryRequirements&
     uint32_t blockSize;
     uint32_t blockOffset;
 
-    block.range.subAllocate((uint32_t)memReqs.size, (uint32_t)memReqs.alignment, blockOffset, offset, blockSize);
+    block.range.subAllocate(
+        (uint32_t)memReqs.size, (uint32_t)memReqs.alignment, blockOffset, offset, blockSize);
 
     block.allocationCount = 1;
-    block.usedSize        = blockSize;
-    block.mapCount        = 0;
-    block.mapped          = nullptr;
-    block.mappable        = (memProps & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0;
+    block.usedSize = blockSize;
+    block.mapCount = 0;
+    block.mapped = nullptr;
+    block.mappable = (memProps & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0;
 
     Allocation allocation;
-    allocation.mem    = block.mem;
+    allocation.mem = block.mem;
     allocation.offset = offset;
-    allocation.size   = memReqs.size;
+    allocation.size = memReqs.size;
 
     m_usedSize += blockSize;
 
     m_activeBlockCount++;
 
     return createID(allocation, id, blockOffset, blockSize);
-  }
-  else
-  {
+  } else {
     // make block free
     m_freeBlockIndex = block.id.instantiate(m_freeBlockIndex);
 
-    if(result == VK_ERROR_OUT_OF_DEVICE_MEMORY
-       && ((memProps == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) || (memProps == 0 && preferDevice)))
-    {
+    if (result == VK_ERROR_OUT_OF_DEVICE_MEMORY &&
+        ((memProps == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) || (memProps == 0 && preferDevice))) {
       // downgrade memory property to zero and/or not preferDevice
       LOGW("downgrade memory\n");
       return allocInternal(memReqs, 0, isLinear, dedicated, result, !preferDevice, state);
-    }
-    else
-    {
+    } else {
       LOGE("could not allocate memory: VkResult %d\n", result);
       return AllocationID();
     }
   }
 }
 
-void DeviceMemoryAllocator::free(AllocationID allocationID)
-{
-  const AllocationInfo& info  = getInfo(allocationID);
-  Block&                block = getBlock(info.block);
+void DeviceMemoryAllocator::free(AllocationID allocationID) {
+  const AllocationInfo& info = getInfo(allocationID);
+  Block& block = getBlock(info.block);
 
   destroyID(allocationID);
 
@@ -668,12 +608,11 @@ void DeviceMemoryAllocator::free(AllocationID allocationID)
   block.allocationCount--;
   block.usedSize -= info.blockSize;
 
-  if(block.allocationCount == 0 && !(block.isFirst && m_keepFirst))
-  {
+  if (block.allocationCount == 0 && !(block.isFirst && m_keepFirst)) {
     assert(block.usedSize == 0);
     assert(!block.mapped);
     freeBlockMemory(info.block, block.mem);
-    block.mem     = VK_NULL_HANDLE;
+    block.mem = VK_NULL_HANDLE;
     block.isFirst = false;
 
     m_allocatedSize -= block.allocationSize;
@@ -684,84 +623,75 @@ void DeviceMemoryAllocator::free(AllocationID allocationID)
   }
 }
 
-void* DeviceMemoryAllocator::map(AllocationID allocationID, VkResult *pResult)
-{
-  const AllocationInfo& info  = getInfo(allocationID);
-  Block&                block = getBlock(info.block);
+void* DeviceMemoryAllocator::map(AllocationID allocationID, VkResult* pResult) {
+  const AllocationInfo& info = getInfo(allocationID);
+  Block& block = getBlock(info.block);
 
   assert(block.mappable);
   block.mapCount++;
 
-  if(!block.mapped)
-  {
-    VkResult result = vkMapMemory(m_device, block.mem, 0, block.allocationSize, 0, (void**)&block.mapped);
-    if (pResult)
-    {
-      *pResult = result;
-    }
+  if (!block.mapped) {
+    VkResult result =
+        vkMapMemory(m_device, block.mem, 0, block.allocationSize, 0, (void**)&block.mapped);
+    if (pResult) { *pResult = result; }
   }
   return block.mapped + info.allocation.offset;
 }
 
-void DeviceMemoryAllocator::unmap(AllocationID allocationID)
-{
-  const AllocationInfo& info  = getInfo(allocationID);
-  Block&                block = getBlock(info.block);
+void DeviceMemoryAllocator::unmap(AllocationID allocationID) {
+  const AllocationInfo& info = getInfo(allocationID);
+  Block& block = getBlock(info.block);
 
   assert(block.mapped);
 
-  if(--block.mapCount == 0)
-  {
+  if (--block.mapCount == 0) {
     block.mapped = nullptr;
     vkUnmapMemory(m_device, block.mem);
   }
 }
 
 VkImage DeviceMemoryAllocator::createImage(const VkImageCreateInfo& createInfo,
-                                           AllocationID&            allocationID,
-                                           VkMemoryPropertyFlags    memProps,
-                                           VkResult&                result)
-{
+                                           AllocationID& allocationID,
+                                           VkMemoryPropertyFlags memProps, VkResult& result) {
   VkImage image;
 
   assert(createInfo.extent.width && createInfo.extent.height && createInfo.extent.depth);
 
   result = createImageInternal(m_device, &createInfo, &image);
-  if(result != VK_SUCCESS)
-    return VK_NULL_HANDLE;
+  if (result != VK_SUCCESS) return VK_NULL_HANDLE;
 
-  VkMemoryRequirements2          memReqs       = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
-  VkMemoryDedicatedRequirements  dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
-  VkImageMemoryRequirementsInfo2 imageReqs     = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2};
+  VkMemoryRequirements2 memReqs = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+  VkMemoryDedicatedRequirements dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
+  VkImageMemoryRequirementsInfo2 imageReqs = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2};
 
   imageReqs.image = image;
-  memReqs.pNext   = &dedicatedRegs;
+  memReqs.pNext = &dedicatedRegs;
   vkGetImageMemoryRequirements2(m_device, &imageReqs, &memReqs);
 
   VkBool32 useDedicated = m_forceDedicatedAllocation || dedicatedRegs.prefersDedicatedAllocation;
 
   VkMemoryDedicatedAllocateInfo dedicatedInfo = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO};
-  dedicatedInfo.image                         = image;
+  dedicatedInfo.image = image;
 
-  allocationID          = alloc(memReqs.memoryRequirements, memProps, createInfo.tiling == VK_IMAGE_TILING_LINEAR,
+  allocationID = alloc(memReqs.memoryRequirements,
+                       memProps,
+                       createInfo.tiling == VK_IMAGE_TILING_LINEAR,
                        useDedicated ? &dedicatedInfo : nullptr);
   Allocation allocation = allocationID.isValid() ? getAllocation(allocationID) : Allocation();
 
-  if(allocation.mem == VK_NULL_HANDLE)
-  {
+  if (allocation.mem == VK_NULL_HANDLE) {
     vkDestroyImage(m_device, image, nullptr);
     result = VK_ERROR_OUT_OF_POOL_MEMORY;
     return VK_NULL_HANDLE;
   }
 
   VkBindImageMemoryInfo bindInfos = {VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO};
-  bindInfos.image                 = image;
-  bindInfos.memory                = allocation.mem;
-  bindInfos.memoryOffset          = allocation.offset;
+  bindInfos.image = image;
+  bindInfos.memory = allocation.mem;
+  bindInfos.memoryOffset = allocation.offset;
 
   result = vkBindImageMemory2(m_device, 1, &bindInfos);
-  if(result != VK_SUCCESS)
-  {
+  if (result != VK_SUCCESS) {
     vkDestroyImage(m_device, image, nullptr);
     return VK_NULL_HANDLE;
   }
@@ -769,52 +699,47 @@ VkImage DeviceMemoryAllocator::createImage(const VkImageCreateInfo& createInfo,
   return image;
 }
 VkBuffer DeviceMemoryAllocator::createBuffer(const VkBufferCreateInfo& createInfo,
-                                             AllocationID&             allocationID,
-                                             VkMemoryPropertyFlags     memProps,
-                                             VkResult&                 result)
-{
+                                             AllocationID& allocationID,
+                                             VkMemoryPropertyFlags memProps, VkResult& result) {
   VkBuffer buffer;
 
   assert(createInfo.size);
 
   result = createBufferInternal(m_device, &createInfo, &buffer);
-  if(result != VK_SUCCESS)
-  {
-    return VK_NULL_HANDLE;
-  }
+  if (result != VK_SUCCESS) { return VK_NULL_HANDLE; }
 
-  VkMemoryRequirements2           memReqs       = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
-  VkMemoryDedicatedRequirements   dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
-  VkBufferMemoryRequirementsInfo2 bufferReqs    = {VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2};
+  VkMemoryRequirements2 memReqs = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+  VkMemoryDedicatedRequirements dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
+  VkBufferMemoryRequirementsInfo2 bufferReqs = {
+      VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2};
 
   bufferReqs.buffer = buffer;
-  memReqs.pNext     = &dedicatedRegs;
+  memReqs.pNext = &dedicatedRegs;
   vkGetBufferMemoryRequirements2(m_device, &bufferReqs, &memReqs);
 
   // for buffers don't use "preferred", but only requires
   VkBool32 useDedicated = m_forceDedicatedAllocation || dedicatedRegs.requiresDedicatedAllocation;
 
   VkMemoryDedicatedAllocateInfo dedicatedInfo = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO};
-  dedicatedInfo.buffer                        = buffer;
+  dedicatedInfo.buffer = buffer;
 
-  allocationID          = alloc(memReqs.memoryRequirements, memProps, true, useDedicated ? &dedicatedInfo : nullptr);
+  allocationID =
+      alloc(memReqs.memoryRequirements, memProps, true, useDedicated ? &dedicatedInfo : nullptr);
   Allocation allocation = allocationID.isValid() ? getAllocation(allocationID) : Allocation();
 
-  if(allocation.mem == VK_NULL_HANDLE)
-  {
+  if (allocation.mem == VK_NULL_HANDLE) {
     vkDestroyBuffer(m_device, buffer, nullptr);
     result = VK_ERROR_OUT_OF_POOL_MEMORY;
     return VK_NULL_HANDLE;
   }
 
   VkBindBufferMemoryInfo bindInfos = {VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO};
-  bindInfos.buffer                 = buffer;
-  bindInfos.memory                 = allocation.mem;
-  bindInfos.memoryOffset           = allocation.offset;
+  bindInfos.buffer = buffer;
+  bindInfos.memory = allocation.mem;
+  bindInfos.memoryOffset = allocation.offset;
 
   result = vkBindBufferMemory2(m_device, 1, &bindInfos);
-  if(result != VK_SUCCESS)
-  {
+  if (result != VK_SUCCESS) {
     vkDestroyBuffer(m_device, buffer, nullptr);
     return VK_NULL_HANDLE;
   }
@@ -822,58 +747,53 @@ VkBuffer DeviceMemoryAllocator::createBuffer(const VkBufferCreateInfo& createInf
   return buffer;
 }
 
-VkBuffer DeviceMemoryAllocator::createBuffer(VkDeviceSize          size,
-                                             VkBufferUsageFlags    usage,
-                                             AllocationID&         allocationID,
-                                             VkMemoryPropertyFlags memProps,
-                                             VkResult&             result)
-{
+VkBuffer DeviceMemoryAllocator::createBuffer(VkDeviceSize size, VkBufferUsageFlags usage,
+                                             AllocationID& allocationID,
+                                             VkMemoryPropertyFlags memProps, VkResult& result) {
   VkBufferCreateInfo createInfo = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
-  createInfo.usage              = usage | m_defaultBufferUsageFlags | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
-  createInfo.size               = size;
+  createInfo.usage = usage | m_defaultBufferUsageFlags | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+  createInfo.size = size;
 
   return createBuffer(createInfo, allocationID, memProps, result);
 }
 
 #if VK_NV_ray_tracing
-VkAccelerationStructureNV DeviceMemoryAllocator::createAccStructure(const VkAccelerationStructureCreateInfoNV& createInfo,
-                                                                    AllocationID&         allocationID,
-                                                                    VkMemoryPropertyFlags memProps,
-                                                                    VkResult&             result)
-{
+VkAccelerationStructureNV DeviceMemoryAllocator::createAccStructure(
+    const VkAccelerationStructureCreateInfoNV& createInfo, AllocationID& allocationID,
+    VkMemoryPropertyFlags memProps, VkResult& result) {
   VkAccelerationStructureNV accel;
   result = vkCreateAccelerationStructureNV(m_device, &createInfo, nullptr, &accel);
-  if(result != VK_SUCCESS)
-  {
-    return VK_NULL_HANDLE;
-  }
+  if (result != VK_SUCCESS) { return VK_NULL_HANDLE; }
 
-  VkMemoryRequirements2                           memReqs = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
-  VkAccelerationStructureMemoryRequirementsInfoNV memInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV};
+  VkMemoryRequirements2 memReqs = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+  VkAccelerationStructureMemoryRequirementsInfoNV memInfo{
+      VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV};
   memInfo.accelerationStructure = accel;
   vkGetAccelerationStructureMemoryRequirementsNV(m_device, &memInfo, &memReqs);
 
-  allocationID = alloc(memReqs.memoryRequirements, memProps, true, m_forceDedicatedAllocation ? DEDICATED_PROXY : nullptr);
+  allocationID = alloc(memReqs.memoryRequirements,
+                       memProps,
+                       true,
+                       m_forceDedicatedAllocation ? DEDICATED_PROXY : nullptr);
 
   Allocation allocation = allocationID.isValid() ? getAllocation(allocationID) : Allocation();
 
-  if(allocation.mem == VK_NULL_HANDLE)
-  {
+  if (allocation.mem == VK_NULL_HANDLE) {
     vkDestroyAccelerationStructureNV(m_device, accel, nullptr);
     result = VK_ERROR_OUT_OF_POOL_MEMORY;
     return VK_NULL_HANDLE;
   }
 
-  VkBindAccelerationStructureMemoryInfoNV bind = {VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV};
-  bind.accelerationStructure                   = accel;
-  bind.memory                                  = allocation.mem;
-  bind.memoryOffset                            = allocation.offset;
+  VkBindAccelerationStructureMemoryInfoNV bind = {
+      VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV};
+  bind.accelerationStructure = accel;
+  bind.memory = allocation.mem;
+  bind.memoryOffset = allocation.offset;
 
   assert(allocation.offset % memReqs.memoryRequirements.alignment == 0);
 
   result = vkBindAccelerationStructureMemoryNV(m_device, 1, &bind);
-  if(result != VK_SUCCESS)
-  {
+  if (result != VK_SUCCESS) {
     vkDestroyAccelerationStructureNV(m_device, accel, nullptr);
     free(allocationID);
     allocationID = AllocationID();
@@ -884,5 +804,4 @@ VkAccelerationStructureNV DeviceMemoryAllocator::createAccStructure(const VkAcce
 }
 #endif
 
-
 }  // namespace nvvk
diff --git a/modules/holoviz/thirdparty/nvpro_core/nvvk/memorymanagement_vk.hpp b/modules/holoviz/thirdparty/nvpro_core/nvvk/memorymanagement_vk.hpp
index 0242083..07b495b 100644
--- a/modules/holoviz/thirdparty/nvpro_core/nvvk/memorymanagement_vk.hpp
+++ b/modules/holoviz/thirdparty/nvpro_core/nvvk/memorymanagement_vk.hpp
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  *
- * SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -227,7 +227,7 @@ class DeviceMemoryAllocator : public MemAllocator
   //////////////////////////////////////////////////////////////////////////
 
   // Implement MemAllocator interface
-  virtual MemHandle allocMemory(const MemAllocateInfo& allocInfo, VkResult *pResult = nullptr) override;
+  virtual MemHandle allocMemory(const MemAllocateInfo& allocInfo, uint32_t plane = 0, VkResult *pResult = nullptr) override;
   virtual void      freeMemory(MemHandle memHandle) override;
   virtual MemInfo   getMemoryInfo(MemHandle memHandle) const override;
   virtual void*     map(MemHandle memHandle, VkDeviceSize offset = 0, VkDeviceSize size = VK_WHOLE_SIZE, VkResult *pResult = nullptr) override;
diff --git a/modules/holoviz/thirdparty/nvpro_core/nvvk/resourceallocator_vk.cpp b/modules/holoviz/thirdparty/nvpro_core/nvvk/resourceallocator_vk.cpp
index 21d68a8..f05bf7a 100644
--- a/modules/holoviz/thirdparty/nvpro_core/nvvk/resourceallocator_vk.cpp
+++ b/modules/holoviz/thirdparty/nvpro_core/nvvk/resourceallocator_vk.cpp
@@ -13,57 +13,55 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  *
- * SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 
 #include "resourceallocator_vk.hpp"
 
-#include "memallocator_dma_vk.hpp"
-#include "memallocator_dedicated_vk.hpp"
 #include "error_vk.hpp"
 #include "images_vk.hpp"
+#include "memallocator_dedicated_vk.hpp"
+#include "memallocator_dma_vk.hpp"
 
 namespace nvvk {
 
-ResourceAllocator::ResourceAllocator(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAlloc, VkDeviceSize stagingBlockSize)
-{
+ResourceAllocator::ResourceAllocator(VkDevice device, VkPhysicalDevice physicalDevice,
+                                     MemAllocator* memAlloc, VkDeviceSize stagingBlockSize) {
   init(device, physicalDevice, memAlloc);
 }
 
-ResourceAllocator::~ResourceAllocator()
-{
+ResourceAllocator::~ResourceAllocator() {
   deinit();
 }
 
-void ResourceAllocator::init(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAlloc, VkDeviceSize stagingBlockSize)
-{
-  m_device         = device;
+void ResourceAllocator::init(VkDevice device, VkPhysicalDevice physicalDevice,
+                             MemAllocator* memAlloc, VkDeviceSize stagingBlockSize) {
+  m_device = device;
   m_physicalDevice = physicalDevice;
-  m_memAlloc       = memAlloc;
+  m_memAlloc = memAlloc;
   vkGetPhysicalDeviceMemoryProperties(physicalDevice, &m_memoryProperties);
   m_samplerPool.init(device);
   m_staging = std::make_unique<StagingMemoryManager>(memAlloc, stagingBlockSize);
 }
 
-void ResourceAllocator::deinit()
-{
+void ResourceAllocator::deinit() {
   m_samplerPool.deinit();
   m_staging.reset();
 }
 
-Buffer ResourceAllocator::createBuffer(const VkBufferCreateInfo& info_, const VkMemoryPropertyFlags memProperties_)
-{
+Buffer ResourceAllocator::createBuffer(const VkBufferCreateInfo& info_,
+                                       const VkMemoryPropertyFlags memProperties_) {
   Buffer resultBuffer;
   // Create Buffer (can be overloaded)
   CreateBufferEx(info_, &resultBuffer.buffer);
 
   // Find memory requirements
-  VkMemoryRequirements2           memReqs{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
-  VkMemoryDedicatedRequirements   dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
+  VkMemoryRequirements2 memReqs{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+  VkMemoryDedicatedRequirements dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
   VkBufferMemoryRequirementsInfo2 bufferReqs{VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2};
 
-  memReqs.pNext     = &dedicatedRegs;
+  memReqs.pNext = &dedicatedRegs;
   bufferReqs.buffer = resultBuffer.buffer;
 
   vkGetBufferMemoryRequirements2(m_device, &bufferReqs, &memReqs);
@@ -71,132 +69,168 @@ Buffer ResourceAllocator::createBuffer(const VkBufferCreateInfo& info_, const Vk
   // Build up allocation info
   MemAllocateInfo allocInfo(memReqs.memoryRequirements, memProperties_, false);
 
-  if(info_.usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT)
-  {
+  if (info_.usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
     allocInfo.setAllocationFlags(VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT);
   }
-  if(dedicatedRegs.requiresDedicatedAllocation)
-  {
+  if (dedicatedRegs.requiresDedicatedAllocation) {
     allocInfo.setDedicatedBuffer(resultBuffer.buffer);
   }
 
   // Allocate memory
   resultBuffer.memHandle = AllocateMemory(allocInfo);
-  if (resultBuffer.memHandle)
-  {
+  if (resultBuffer.memHandle) {
     const auto memInfo = m_memAlloc->getMemoryInfo(resultBuffer.memHandle);
     // Bind memory to buffer
     NVVK_CHECK(vkBindBufferMemory(m_device, resultBuffer.buffer, memInfo.memory, memInfo.offset));
-  }
-  else
-  {
+  } else {
     destroy(resultBuffer);
   }
 
   return resultBuffer;
 }
 
-Buffer ResourceAllocator::createBuffer(VkDeviceSize size_, VkBufferUsageFlags usage_, const VkMemoryPropertyFlags memUsage_)
-{
+Buffer ResourceAllocator::createBuffer(VkDeviceSize size_, VkBufferUsageFlags usage_,
+                                       const VkMemoryPropertyFlags memUsage_) {
   VkBufferCreateInfo info{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
-  info.size  = size_;
+  info.size = size_;
   info.usage = usage_ | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
 
   return createBuffer(info, memUsage_);
 }
 
-Buffer ResourceAllocator::createBuffer(const VkCommandBuffer& cmdBuf,
-                                       const VkDeviceSize&    size_,
-                                       const void*            data_,
-                                       VkBufferUsageFlags     usage_,
-                                       VkMemoryPropertyFlags  memProps)
-{
+Buffer ResourceAllocator::createBuffer(const VkCommandBuffer& cmdBuf, const VkDeviceSize& size_,
+                                       const void* data_, VkBufferUsageFlags usage_,
+                                       VkMemoryPropertyFlags memProps) {
   VkBufferCreateInfo createInfoR{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
-  createInfoR.size    = size_;
-  createInfoR.usage   = usage_ | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+  createInfoR.size = size_;
+  createInfoR.usage = usage_ | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
   Buffer resultBuffer = createBuffer(createInfoR, memProps);
 
-  if(data_)
-  {
-    m_staging->cmdToBuffer(cmdBuf, resultBuffer.buffer, 0, size_, data_);
-  }
+  if (data_) { m_staging->cmdToBuffer(cmdBuf, resultBuffer.buffer, 0, size_, data_); }
 
   return resultBuffer;
 }
 
-Image ResourceAllocator::createImage(const VkImageCreateInfo& info_, const VkMemoryPropertyFlags memUsage_)
-{
+Image ResourceAllocator::createImage(const VkImageCreateInfo& info_,
+                                     const VkMemoryPropertyFlags memUsage_) {
   Image resultImage;
   // Create image
   CreateImageEx(info_, &resultImage.image);
 
+  uint32_t planes = 1;
+  switch (info_.format) {
+    case VkFormat::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
+    case VkFormat::VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
+    case VkFormat::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
+    case VkFormat::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
+    case VkFormat::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
+    case VkFormat::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
+    case VkFormat::VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
+    case VkFormat::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
+    case VkFormat::VK_FORMAT_G8_B8R8_2PLANE_444_UNORM:
+    case VkFormat::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16:
+    case VkFormat::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16:
+    case VkFormat::VK_FORMAT_G16_B16R16_2PLANE_444_UNORM:
+      planes = 2;
+      break;
+    case VkFormat::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
+    case VkFormat::VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
+    case VkFormat::VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
+    case VkFormat::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
+    case VkFormat::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
+    case VkFormat::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
+    case VkFormat::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
+    case VkFormat::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
+    case VkFormat::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
+    case VkFormat::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
+    case VkFormat::VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
+    case VkFormat::VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
+      planes = 3;
+      break;
+  }
+
   // Find memory requirements
-  VkMemoryRequirements2          memReqs{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
-  VkMemoryDedicatedRequirements  dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
-  VkImageMemoryRequirementsInfo2 imageReqs{VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2};
+  VkMemoryRequirements2 memReqs{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+  VkMemoryDedicatedRequirements dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
+  memReqs.pNext = &dedicatedRegs;
 
+  VkImageMemoryRequirementsInfo2 imageReqs{VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2};
+  VkImagePlaneMemoryRequirementsInfo imagePlaneMemoryReqs{
+      VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO};
   imageReqs.image = resultImage.image;
-  memReqs.pNext   = &dedicatedRegs;
+  if (planes > 1) { imageReqs.pNext = &imagePlaneMemoryReqs; }
 
-  vkGetImageMemoryRequirements2(m_device, &imageReqs, &memReqs);
+  std::vector<VkMemoryRequirements> memoryRequirements;
 
-  // Build up allocation info
-  MemAllocateInfo allocInfo(memReqs.memoryRequirements, memUsage_, true);
-  if(dedicatedRegs.requiresDedicatedAllocation)
-  {
-    allocInfo.setDedicatedImage(resultImage.image);
-  }
+  for (uint32_t plane = 0; plane < planes; ++plane) {
+    imagePlaneMemoryReqs.planeAspect =
+        VkImageAspectFlagBits(VkImageAspectFlagBits::VK_IMAGE_ASPECT_PLANE_0_BIT << plane);
+    vkGetImageMemoryRequirements2(m_device, &imageReqs, &memReqs);
 
-  // Allocate memory
-  resultImage.memHandle = AllocateMemory(allocInfo);
-  if(resultImage.memHandle)
-  {
-    const auto memInfo = m_memAlloc->getMemoryInfo(resultImage.memHandle);
-    // Bind memory to image
-    NVVK_CHECK(vkBindImageMemory(m_device, resultImage.image, memInfo.memory, memInfo.offset));
+    memoryRequirements.push_back(memReqs.memoryRequirements);
   }
-  else
-  {
-    destroy(resultImage);
+
+  // Build up allocation info
+  MemAllocateInfo allocInfo(memoryRequirements, memUsage_, true);
+  if (dedicatedRegs.requiresDedicatedAllocation) { allocInfo.setDedicatedImage(resultImage.image); }
+
+  // Allocate and bind memory
+  resultImage.memHandles.resize(planes);
+  std::vector<VkBindImageMemoryInfo> bindImageMemoryInfos(planes);
+  for (uint32_t plane = 0; plane < planes; ++plane) {
+    resultImage.memHandles[plane] = AllocateMemory(allocInfo, plane);
+    if (resultImage.memHandles[plane]) {
+      const auto memInfo = m_memAlloc->getMemoryInfo(resultImage.memHandles[plane]);
+      bindImageMemoryInfos[plane] = VkBindImageMemoryInfo{VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO};
+      bindImageMemoryInfos[plane].image = resultImage.image;
+      bindImageMemoryInfos[plane].memory = memInfo.memory;
+      bindImageMemoryInfos[plane].memoryOffset = memInfo.offset;
+    } else {
+      destroy(resultImage);
+      return resultImage;
+    }
   }
+
+  // Bind memory to image
+  NVVK_CHECK(
+      vkBindImageMemory2(m_device, bindImageMemoryInfos.size(), bindImageMemoryInfos.data()));
+
   return resultImage;
 }
 
-Image ResourceAllocator::createImage(const VkCommandBuffer&   cmdBuf,
-                                     size_t                   size_,
-                                     const void*              data_,
-                                     const VkImageCreateInfo& info_,
-                                     const VkImageLayout&     layout_)
-{
+Image ResourceAllocator::createImage(const VkCommandBuffer& cmdBuf, size_t size_, const void* data_,
+                                     const VkImageCreateInfo& info_, const VkImageLayout& layout_) {
   Image resultImage = createImage(info_, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
 
   // Copy the data to staging buffer than to image
-  if(data_ != nullptr)
-  {
+  if (data_ != nullptr) {
     // Copy buffer to image
     VkImageSubresourceRange subresourceRange{};
-    subresourceRange.aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT;
+    subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
     subresourceRange.baseArrayLayer = 0;
-    subresourceRange.baseMipLevel   = 0;
-    subresourceRange.layerCount     = 1;
-    subresourceRange.levelCount     = info_.mipLevels;
+    subresourceRange.baseMipLevel = 0;
+    subresourceRange.layerCount = 1;
+    subresourceRange.levelCount = info_.mipLevels;
 
     // doing these transitions per copy is not efficient, should do in bulk for many images
-    nvvk::cmdBarrierImageLayout(cmdBuf, resultImage.image, VK_IMAGE_LAYOUT_UNDEFINED,
-                                VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresourceRange);
+    nvvk::cmdBarrierImageLayout(cmdBuf,
+                                resultImage.image,
+                                VK_IMAGE_LAYOUT_UNDEFINED,
+                                VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+                                subresourceRange);
 
-    VkOffset3D               offset      = {0};
+    VkOffset3D offset = {0};
     VkImageSubresourceLayers subresource = {0};
-    subresource.aspectMask               = VK_IMAGE_ASPECT_COLOR_BIT;
-    subresource.layerCount               = 1;
+    subresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+    subresource.layerCount = 1;
 
-    m_staging->cmdToImage(cmdBuf, resultImage.image, offset, info_.extent, subresource, size_, data_);
+    m_staging->cmdToImage(
+        cmdBuf, resultImage.image, offset, info_.extent, subresource, size_, data_);
 
     // Setting final image layout
-    nvvk::cmdBarrierImageLayout(cmdBuf, resultImage.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, layout_);
-  }
-  else
-  {
+    nvvk::cmdBarrierImageLayout(
+        cmdBuf, resultImage.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, layout_);
+  } else {
     // Setting final image layout
     nvvk::cmdBarrierImageLayout(cmdBuf, resultImage.image, VK_IMAGE_LAYOUT_UNDEFINED, layout_);
   }
@@ -204,57 +238,53 @@ Image ResourceAllocator::createImage(const VkCommandBuffer&   cmdBuf,
   return resultImage;
 }
 
-nvvk::Texture ResourceAllocator::createTexture(const Image&                 image,
+nvvk::Texture ResourceAllocator::createTexture(const Image& image,
                                                const VkImageViewCreateInfo& imageViewCreateInfo,
-                                               const VkSamplerCreateInfo&   samplerCreateInfo)
-{
-  Texture resultTexture            = createTexture(image, imageViewCreateInfo);
+                                               const VkSamplerCreateInfo& samplerCreateInfo) {
+  Texture resultTexture = createTexture(image, imageViewCreateInfo);
   resultTexture.descriptor.sampler = m_samplerPool.acquireSampler(samplerCreateInfo);
 
   return resultTexture;
 }
 
-
-Texture ResourceAllocator::createTexture(const Image& image, const VkImageViewCreateInfo& imageViewCreateInfo)
-{
+Texture ResourceAllocator::createTexture(const Image& image,
+                                         const VkImageViewCreateInfo& imageViewCreateInfo) {
   Texture resultTexture;
-  resultTexture.image                  = image.image;
-  resultTexture.memHandle              = image.memHandle;
+  resultTexture.image = image.image;
+  resultTexture.memHandles = image.memHandles;
   resultTexture.descriptor.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
 
   assert(imageViewCreateInfo.image == image.image);
-  NVVK_CHECK(vkCreateImageView(m_device, &imageViewCreateInfo, nullptr, &resultTexture.descriptor.imageView));
+  NVVK_CHECK(vkCreateImageView(
+      m_device, &imageViewCreateInfo, nullptr, &resultTexture.descriptor.imageView));
 
   return resultTexture;
 }
 
-Texture ResourceAllocator::createTexture(const VkCommandBuffer&     cmdBuf,
-                                         size_t                     size_,
-                                         const void*                data_,
-                                         const VkImageCreateInfo&   info_,
+Texture ResourceAllocator::createTexture(const VkCommandBuffer& cmdBuf, size_t size_,
+                                         const void* data_, const VkImageCreateInfo& info_,
                                          const VkSamplerCreateInfo& samplerCreateInfo,
-                                         const VkImageLayout&       layout_,
-                                         bool                       isCube)
-{
+                                         const VkImageLayout& layout_, bool isCube) {
   Image image = createImage(cmdBuf, size_, data_, info_, layout_);
 
   VkImageViewCreateInfo viewInfo{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
-  viewInfo.pNext                           = nullptr;
-  viewInfo.image                           = image.image;
-  viewInfo.format                          = info_.format;
-  viewInfo.subresourceRange.aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT;
-  viewInfo.subresourceRange.baseMipLevel   = 0;
-  viewInfo.subresourceRange.levelCount     = VK_REMAINING_MIP_LEVELS;
+  viewInfo.pNext = nullptr;
+  viewInfo.image = image.image;
+  viewInfo.format = info_.format;
+  viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+  viewInfo.subresourceRange.baseMipLevel = 0;
+  viewInfo.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
   viewInfo.subresourceRange.baseArrayLayer = 0;
-  viewInfo.subresourceRange.layerCount     = VK_REMAINING_ARRAY_LAYERS;
-  switch(info_.imageType)
-  {
+  viewInfo.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS;
+  switch (info_.imageType) {
     case VK_IMAGE_TYPE_1D:
-      viewInfo.viewType = (info_.arrayLayers > 1 ? VK_IMAGE_VIEW_TYPE_1D_ARRAY : VK_IMAGE_VIEW_TYPE_1D);
+      viewInfo.viewType =
+          (info_.arrayLayers > 1 ? VK_IMAGE_VIEW_TYPE_1D_ARRAY : VK_IMAGE_VIEW_TYPE_1D);
       break;
     case VK_IMAGE_TYPE_2D:
-      viewInfo.viewType = isCube ? VK_IMAGE_VIEW_TYPE_CUBE :
-                                   (info_.arrayLayers > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D);
+      viewInfo.viewType =
+          isCube ? VK_IMAGE_VIEW_TYPE_CUBE
+                 : (info_.arrayLayers > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D);
       break;
     case VK_IMAGE_TYPE_3D:
       viewInfo.viewType = VK_IMAGE_VIEW_TYPE_3D;
@@ -263,110 +293,91 @@ Texture ResourceAllocator::createTexture(const VkCommandBuffer&     cmdBuf,
       assert(0);
   }
 
-  Texture resultTexture                = createTexture(image, viewInfo, samplerCreateInfo);
+  Texture resultTexture = createTexture(image, viewInfo, samplerCreateInfo);
   resultTexture.descriptor.imageLayout = layout_;
   return resultTexture;
 }
 
-void ResourceAllocator::finalizeStaging(VkFence fence /*= VK_NULL_HANDLE*/)
-{
+void ResourceAllocator::finalizeStaging(VkFence fence /*= VK_NULL_HANDLE*/) {
   m_staging->finalizeResources(fence);
 }
 
-void ResourceAllocator::releaseStaging()
-{
+void ResourceAllocator::releaseStaging() {
   m_staging->releaseResources();
 }
 
-void ResourceAllocator::finalizeAndReleaseStaging(VkFence fence /*= VK_NULL_HANDLE*/)
-{
+void ResourceAllocator::finalizeAndReleaseStaging(VkFence fence /*= VK_NULL_HANDLE*/) {
   m_staging->finalizeResources(fence);
   m_staging->releaseResources();
 }
 
-nvvk::StagingMemoryManager* ResourceAllocator::getStaging()
-{
+nvvk::StagingMemoryManager* ResourceAllocator::getStaging() {
   return m_staging.get();
 }
 
-const nvvk::StagingMemoryManager* ResourceAllocator::getStaging() const
-{
+const nvvk::StagingMemoryManager* ResourceAllocator::getStaging() const {
   return m_staging.get();
 }
 
-void ResourceAllocator::destroy(Buffer& b_)
-{
+void ResourceAllocator::destroy(Buffer& b_) {
   vkDestroyBuffer(m_device, b_.buffer, nullptr);
   m_memAlloc->freeMemory(b_.memHandle);
 
   b_ = Buffer();
 }
 
-void ResourceAllocator::destroy(Image& i_)
-{
+void ResourceAllocator::destroy(Image& i_) {
   vkDestroyImage(m_device, i_.image, nullptr);
 
-  m_memAlloc->freeMemory(i_.memHandle);
+  for (auto&& memHandle : i_.memHandles) { m_memAlloc->freeMemory(memHandle); }
   i_ = Image();
 }
 
-void ResourceAllocator::destroy(Texture& t_)
-{
+void ResourceAllocator::destroy(Texture& t_) {
   vkDestroyImageView(m_device, t_.descriptor.imageView, nullptr);
   vkDestroyImage(m_device, t_.image, nullptr);
-  m_memAlloc->freeMemory(t_.memHandle);
+  for (auto&& memHandle : t_.memHandles) { m_memAlloc->freeMemory(memHandle); }
 
-  if(t_.descriptor.sampler)
-  {
-    m_samplerPool.releaseSampler(t_.descriptor.sampler);
-  }
+  if (t_.descriptor.sampler) { m_samplerPool.releaseSampler(t_.descriptor.sampler); }
 
   t_ = Texture();
 }
 
-void* ResourceAllocator::map(const Buffer& buffer)
-{
+void* ResourceAllocator::map(const Buffer& buffer) {
   void* pData = m_memAlloc->map(buffer.memHandle);
   return pData;
 }
 
-void ResourceAllocator::unmap(const Buffer& buffer)
-{
+void ResourceAllocator::unmap(const Buffer& buffer) {
   m_memAlloc->unmap(buffer.memHandle);
 }
 
-void* ResourceAllocator::map(const Image& buffer)
-{
-  void* pData = m_memAlloc->map(buffer.memHandle);
+void* ResourceAllocator::map(const Image& buffer, uint32_t plane) {
+  void* pData = m_memAlloc->map(buffer.memHandles[plane]);
   return pData;
 }
 
-void ResourceAllocator::unmap(const Image& image)
-{
-  m_memAlloc->unmap(image.memHandle);
+void ResourceAllocator::unmap(const Image& image, uint32_t plane) {
+  m_memAlloc->unmap(image.memHandles[plane]);
 }
 
-MemHandle ResourceAllocator::AllocateMemory(const MemAllocateInfo& allocateInfo)
-{
-  return m_memAlloc->allocMemory(allocateInfo);
+MemHandle ResourceAllocator::AllocateMemory(const MemAllocateInfo& allocateInfo, uint32_t plane) {
+  return m_memAlloc->allocMemory(allocateInfo, plane);
 }
 
-void ResourceAllocator::CreateBufferEx(const VkBufferCreateInfo& info_, VkBuffer* buffer)
-{
+void ResourceAllocator::CreateBufferEx(const VkBufferCreateInfo& info_, VkBuffer* buffer) {
   NVVK_CHECK(vkCreateBuffer(m_device, &info_, nullptr, buffer));
 }
 
-void ResourceAllocator::CreateImageEx(const VkImageCreateInfo& info_, VkImage* image)
-{
+void ResourceAllocator::CreateImageEx(const VkImageCreateInfo& info_, VkImage* image) {
   NVVK_CHECK(vkCreateImage(m_device, &info_, nullptr, image));
 }
 
-uint32_t ResourceAllocator::getMemoryType(uint32_t typeBits, const VkMemoryPropertyFlags& properties)
-{
-  for(uint32_t i = 0; i < m_memoryProperties.memoryTypeCount; i++)
-  {
-    if(((typeBits & (1 << i)) > 0) && (m_memoryProperties.memoryTypes[i].propertyFlags & properties) == properties)
-    {
+uint32_t ResourceAllocator::getMemoryType(uint32_t typeBits,
+                                          const VkMemoryPropertyFlags& properties) {
+  for (uint32_t i = 0; i < m_memoryProperties.memoryTypeCount; i++) {
+    if (((typeBits & (1 << i)) > 0) &&
+        (m_memoryProperties.memoryTypes[i].propertyFlags & properties) == properties) {
       return i;
     }
   }
@@ -374,15 +385,14 @@ uint32_t ResourceAllocator::getMemoryType(uint32_t typeBits, const VkMemoryPrope
   return ~0u;
 }
 
-
-AccelNV ResourceAllocator::createAcceleration(VkAccelerationStructureCreateInfoNV& accel_)
-{
+AccelNV ResourceAllocator::createAcceleration(VkAccelerationStructureCreateInfoNV& accel_) {
   AccelNV resultAccel;
   // Create the acceleration structure
   NVVK_CHECK(vkCreateAccelerationStructureNV(m_device, &accel_, nullptr, &resultAccel.accel));
 
   // Find memory requirements
-  VkAccelerationStructureMemoryRequirementsInfoNV accelMemInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV};
+  VkAccelerationStructureMemoryRequirementsInfoNV accelMemInfo{
+      VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV};
   accelMemInfo.accelerationStructure = resultAccel.accel;
   VkMemoryRequirements2 memReqs{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
   vkGetAccelerationStructureMemoryRequirementsNV(m_device, &accelMemInfo, &memReqs);
@@ -390,38 +400,35 @@ AccelNV ResourceAllocator::createAcceleration(VkAccelerationStructureCreateInfoN
   // Allocate memory
   MemAllocateInfo info(memReqs.memoryRequirements, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, false);
   resultAccel.memHandle = AllocateMemory(info);
-  if(resultAccel.memHandle)
-  {
+  if (resultAccel.memHandle) {
     const auto memInfo = m_memAlloc->getMemoryInfo(resultAccel.memHandle);
 
     // Bind memory with acceleration structure
-    VkBindAccelerationStructureMemoryInfoNV bind{VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV};
+    VkBindAccelerationStructureMemoryInfoNV bind{
+        VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV};
     bind.accelerationStructure = resultAccel.accel;
-    bind.memory                = memInfo.memory;
-    bind.memoryOffset          = memInfo.offset;
+    bind.memory = memInfo.memory;
+    bind.memoryOffset = memInfo.offset;
     NVVK_CHECK(vkBindAccelerationStructureMemoryNV(m_device, 1, &bind));
-  }
-  else
-  {
+  } else {
     destroy(resultAccel);
   }
   return resultAccel;
 }
 
-void ResourceAllocator::destroy(AccelNV& a_)
-{
+void ResourceAllocator::destroy(AccelNV& a_) {
   vkDestroyAccelerationStructureNV(m_device, a_.accel, nullptr);
   m_memAlloc->freeMemory(a_.memHandle);
 
   a_ = AccelNV();
 }
 
-AccelKHR ResourceAllocator::createAcceleration(VkAccelerationStructureCreateInfoKHR& accel_)
-{
+AccelKHR ResourceAllocator::createAcceleration(VkAccelerationStructureCreateInfoKHR& accel_) {
   AccelKHR resultAccel;
   // Allocating the buffer to hold the acceleration structure
-  resultAccel.buffer = createBuffer(accel_.size, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR
-                                                     | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
+  resultAccel.buffer = createBuffer(accel_.size,
+                                    VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR |
+                                        VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
   // Setting the buffer
   accel_.buffer = resultAccel.buffer.buffer;
   // Create the acceleration structure
@@ -430,21 +437,18 @@ AccelKHR ResourceAllocator::createAcceleration(VkAccelerationStructureCreateInfo
   return resultAccel;
 }
 
-void ResourceAllocator::destroy(AccelKHR& a_)
-{
+void ResourceAllocator::destroy(AccelKHR& a_) {
   vkDestroyAccelerationStructureKHR(m_device, a_.accel, nullptr);
   destroy(a_.buffer);
 
   a_ = AccelKHR();
 }
 
-VkSampler ResourceAllocator::acquireSampler(const VkSamplerCreateInfo& info)
-{
+VkSampler ResourceAllocator::acquireSampler(const VkSamplerCreateInfo& info) {
   return m_samplerPool.acquireSampler(info);
 }
 
-void ResourceAllocator::releaseSampler(VkSampler sampler)
-{
+void ResourceAllocator::releaseSampler(VkSampler sampler) {
   m_samplerPool.releaseSampler(sampler);
 }
 
@@ -452,14 +456,13 @@ void ResourceAllocator::releaseSampler(VkSampler sampler)
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-ExportResourceAllocator::ExportResourceAllocator(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAllocator, VkDeviceSize stagingBlockSize)
-    : ResourceAllocator(device, physicalDevice, memAllocator, stagingBlockSize)
-{
-}
+ExportResourceAllocator::ExportResourceAllocator(VkDevice device, VkPhysicalDevice physicalDevice,
+                                                 MemAllocator* memAllocator,
+                                                 VkDeviceSize stagingBlockSize)
+    : ResourceAllocator(device, physicalDevice, memAllocator, stagingBlockSize) {}
 
-void ExportResourceAllocator::CreateBufferEx(const VkBufferCreateInfo& info_, VkBuffer* buffer)
-{
-  VkBufferCreateInfo               info = info_;
+void ExportResourceAllocator::CreateBufferEx(const VkBufferCreateInfo& info_, VkBuffer* buffer) {
+  VkBufferCreateInfo info = info_;
   VkExternalMemoryBufferCreateInfo infoEx{VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO};
 #ifdef WIN32
   infoEx.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
@@ -470,9 +473,8 @@ void ExportResourceAllocator::CreateBufferEx(const VkBufferCreateInfo& info_, Vk
   NVVK_CHECK(vkCreateBuffer(m_device, &info, nullptr, buffer));
 }
 
-void ExportResourceAllocator::CreateImageEx(const VkImageCreateInfo& info_, VkImage* image)
-{
-  auto                            info = info_;
+void ExportResourceAllocator::CreateImageEx(const VkImageCreateInfo& info_, VkImage* image) {
+  auto info = info_;
   VkExternalMemoryImageCreateInfo infoEx{VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO};
 #ifdef WIN32
   infoEx.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
@@ -483,39 +485,35 @@ void ExportResourceAllocator::CreateImageEx(const VkImageCreateInfo& info_, VkIm
   NVVK_CHECK(vkCreateImage(m_device, &info, nullptr, image));
 }
 
-MemHandle ExportResourceAllocator::AllocateMemory(const MemAllocateInfo& allocateInfo)
-{
+MemHandle ExportResourceAllocator::AllocateMemory(const MemAllocateInfo& allocateInfo,
+                                                  uint32_t plane) {
   MemAllocateInfo exportAllocateInfo(allocateInfo);
   exportAllocateInfo.setExportable(true);
-  return ResourceAllocator::AllocateMemory(exportAllocateInfo);
+  return ResourceAllocator::AllocateMemory(exportAllocateInfo, plane);
 }
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-
-ExportResourceAllocatorDedicated::ExportResourceAllocatorDedicated(VkDevice         device,
-                                                                   VkPhysicalDevice physicalDevice,
-                                                                   VkDeviceSize stagingBlockSize /*= NVVK_DEFAULT_STAGING_BLOCKSIZE*/)
-{
+ExportResourceAllocatorDedicated::ExportResourceAllocatorDedicated(
+    VkDevice device, VkPhysicalDevice physicalDevice,
+    VkDeviceSize stagingBlockSize /*= NVVK_DEFAULT_STAGING_BLOCKSIZE*/) {
   init(device, physicalDevice, stagingBlockSize);
 }
 
-ExportResourceAllocatorDedicated::~ExportResourceAllocatorDedicated()
-{
+ExportResourceAllocatorDedicated::~ExportResourceAllocatorDedicated() {
   deinit();
 }
 
-
-void ExportResourceAllocatorDedicated::init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize /*= NVVK_DEFAULT_STAGING_BLOCKSIZE*/)
-{
+void ExportResourceAllocatorDedicated::init(
+    VkDevice device, VkPhysicalDevice physicalDevice,
+    VkDeviceSize stagingBlockSize /*= NVVK_DEFAULT_STAGING_BLOCKSIZE*/) {
   m_memAlloc = std::make_unique<DedicatedMemoryAllocator>(device, physicalDevice);
   ExportResourceAllocator::init(device, physicalDevice, m_memAlloc.get(), stagingBlockSize);
 }
 
-void ExportResourceAllocatorDedicated::deinit()
-{
+void ExportResourceAllocatorDedicated::deinit() {
   ExportResourceAllocator::deinit();
   m_memAlloc.reset();
 }
@@ -524,56 +522,50 @@ void ExportResourceAllocatorDedicated::deinit()
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-ExplicitDeviceMaskResourceAllocator::ExplicitDeviceMaskResourceAllocator(VkDevice         device,
-                                                                         VkPhysicalDevice physicalDevice,
-                                                                         MemAllocator*    memAlloc,
-                                                                         uint32_t         deviceMask)
-{
+ExplicitDeviceMaskResourceAllocator::ExplicitDeviceMaskResourceAllocator(
+    VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAlloc, uint32_t deviceMask) {
   init(device, physicalDevice, memAlloc, deviceMask);
 }
 
-void ExplicitDeviceMaskResourceAllocator::init(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAlloc, uint32_t deviceMask)
-{
+void ExplicitDeviceMaskResourceAllocator::init(VkDevice device, VkPhysicalDevice physicalDevice,
+                                               MemAllocator* memAlloc, uint32_t deviceMask) {
   ResourceAllocator::init(device, physicalDevice, memAlloc);
   m_deviceMask = deviceMask;
 }
 
-MemHandle ExplicitDeviceMaskResourceAllocator::AllocateMemory(const MemAllocateInfo& allocateInfo)
-{
+MemHandle ExplicitDeviceMaskResourceAllocator::AllocateMemory(const MemAllocateInfo& allocateInfo,
+                                                              uint32_t plane) {
   MemAllocateInfo deviceMaskAllocateInfo(allocateInfo);
   deviceMaskAllocateInfo.setDeviceMask(m_deviceMask);
 
-  return ResourceAllocator::AllocateMemory(deviceMaskAllocateInfo);
+  return ResourceAllocator::AllocateMemory(deviceMaskAllocateInfo, plane);
 }
 
-
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-ResourceAllocatorDma::ResourceAllocatorDma(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize, VkDeviceSize memBlockSize)
-{
+ResourceAllocatorDma::ResourceAllocatorDma(VkDevice device, VkPhysicalDevice physicalDevice,
+                                           VkDeviceSize stagingBlockSize,
+                                           VkDeviceSize memBlockSize) {
   init(device, physicalDevice, stagingBlockSize, memBlockSize);
 }
 
-ResourceAllocatorDma::~ResourceAllocatorDma()
-{
+ResourceAllocatorDma::~ResourceAllocatorDma() {
   deinit();
 }
 
-void ResourceAllocatorDma::init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize, VkDeviceSize memBlockSize)
-{
-  m_dma      = std::make_unique<DeviceMemoryAllocator>(device, physicalDevice, memBlockSize);
+void ResourceAllocatorDma::init(VkDevice device, VkPhysicalDevice physicalDevice,
+                                VkDeviceSize stagingBlockSize, VkDeviceSize memBlockSize) {
+  m_dma = std::make_unique<DeviceMemoryAllocator>(device, physicalDevice, memBlockSize);
   ResourceAllocator::init(device, physicalDevice, m_dma.get(), stagingBlockSize);
 }
 
-
-void ResourceAllocatorDma::init(VkInstance, VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize, VkDeviceSize memBlockSize)
-{
+void ResourceAllocatorDma::init(VkInstance, VkDevice device, VkPhysicalDevice physicalDevice,
+                                VkDeviceSize stagingBlockSize, VkDeviceSize memBlockSize) {
   init(device, physicalDevice, stagingBlockSize, memBlockSize);
 }
 
-void ResourceAllocatorDma::deinit()
-{
+void ResourceAllocatorDma::deinit() {
   ResourceAllocator::deinit();
   m_dma.reset();
 }
@@ -581,34 +573,30 @@ void ResourceAllocatorDma::deinit()
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-ResourceAllocatorDedicated::ResourceAllocatorDedicated(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize)
-{
+ResourceAllocatorDedicated::ResourceAllocatorDedicated(VkDevice device,
+                                                       VkPhysicalDevice physicalDevice,
+                                                       VkDeviceSize stagingBlockSize) {
   init(device, physicalDevice, stagingBlockSize);
 }
 
-
-ResourceAllocatorDedicated::~ResourceAllocatorDedicated()
-{
+ResourceAllocatorDedicated::~ResourceAllocatorDedicated() {
   deinit();
 }
 
-void ResourceAllocatorDedicated::init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize)
-{
+void ResourceAllocatorDedicated::init(VkDevice device, VkPhysicalDevice physicalDevice,
+                                      VkDeviceSize stagingBlockSize) {
   m_memAlloc = std::make_unique<DedicatedMemoryAllocator>(device, physicalDevice);
   ResourceAllocator::init(device, physicalDevice, m_memAlloc.get(), stagingBlockSize);
 }
 
-
-void ResourceAllocatorDedicated::init(VkInstance,  // unused
-                                      VkDevice         device,
-                                      VkPhysicalDevice physicalDevice,
-                                      VkDeviceSize     stagingBlockSize /*= NVVK_DEFAULT_STAGING_BLOCKSIZE*/)
-{
+void ResourceAllocatorDedicated::init(
+    VkInstance,  // unused
+    VkDevice device, VkPhysicalDevice physicalDevice,
+    VkDeviceSize stagingBlockSize /*= NVVK_DEFAULT_STAGING_BLOCKSIZE*/) {
   init(device, physicalDevice, stagingBlockSize);
 }
 
-void ResourceAllocatorDedicated::deinit()
-{
+void ResourceAllocatorDedicated::deinit() {
   ResourceAllocator::deinit();
   m_memAlloc.reset();
 }
diff --git a/modules/holoviz/thirdparty/nvpro_core/nvvk/resourceallocator_vk.hpp b/modules/holoviz/thirdparty/nvpro_core/nvvk/resourceallocator_vk.hpp
index 78a7112..e7d5ef7 100644
--- a/modules/holoviz/thirdparty/nvpro_core/nvvk/resourceallocator_vk.hpp
+++ b/modules/holoviz/thirdparty/nvpro_core/nvvk/resourceallocator_vk.hpp
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  *
- * SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -22,9 +22,9 @@
 #include <vulkan/vulkan_core.h>
 
 #ifdef VULKAN_HPP
-#include <vulkan/vulkan_structs.hpp>
 #include <vulkan/vulkan_enums.hpp>
 #include <vulkan/vulkan_handles.hpp>
+#include <vulkan/vulkan_structs.hpp>
 #endif
 
 #include <memory>
@@ -34,130 +34,127 @@
 #include "samplers_vk.hpp"
 #include "stagingmemorymanager_vk.hpp"
 
-
- /**
- \class nvvk::ResourceAllocator
-
- The goal of nvvk::ResourceAllocator is to aid creation of typical Vulkan
- resources (VkBuffer, VkImage and VkAccelerationStructure).
- All memory is allocated using the provided [nvvk::MemAllocator](#class-nvvkmemallocator)
- and bound to the appropriate resources. The allocator contains a
- [nvvk::StagingMemoryManager](#class-nvvkstagingmemorymanager) and
- [nvvk::SamplerPool](#class-nvvksamplerpool) to aid this process.
-
- ResourceAllocator separates object creation and memory allocation by delegating allocation
- of memory to an object of interface type 'nvvk::MemAllocator'.
- This way the ResourceAllocator can be used with different memory allocation strategies, depending on needs.
- nvvk provides three implementations of MemAllocator:
- * nvvk::DedicatedMemoryAllocator is using a very simple allocation scheme, one VkDeviceMemory object per allocation.
-   This strategy is only useful for very simple applications due to the overhead of vkAllocateMemory and
-   an implementation dependent bounded number of vkDeviceMemory allocations possible.
- * nvvk::DMAMemoryAllocator delegates memory requests to a 'nvvk:DeviceMemoryAllocator',
-   as an example implementation of a suballocator
- * nvvk::VMAMemoryAllocator delegates memory requests to a [Vulkan Memory Allocator](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator)
-
- Utility wrapper structs contain the appropriate Vulkan resource and the
- appropriate nvvk::MemHandle :
-
- - nvvk::Buffer
- - nvvk::Image
- - nvvk::Texture  contains VkImage and VkImageView as well as an
-   optional VkSampler stored within VkDescriptorImageInfo
- - nvvk::AccelNV
- - nvvk::AccelKHR
-
- nvvk::Buffer, nvvk::Image, nvvk::Texture and nvvk::AccelKHR nvvk::AccelNV objects can be copied
- by value. They do not track lifetime of the underlying Vulkan objects and memory allocations.
- The corresponding destroy() functions of nvvk::ResourceAllocator destroy created objects and
- free up their memory. ResourceAllocator does not track usage of objects either. Thus, one has to
- make sure that objects are no longer in use by the GPU when they get destroyed.
-
- > Note: These classes are foremost to showcase principle components that
- > a Vulkan engine would most likely have.
- > They are geared towards ease of use in this sample framework, and
- > not optimized nor meant for production code.
-
- \code{.cpp}
- nvvk::DeviceMemoryAllocator memAllocator;
- nvvk::ResourceAllocator     resAllocator;
-
- memAllocator.init(device, physicalDevice);
- resAllocator.init(device, physicalDevice, &memAllocator);
-
- ...
-
- VkCommandBuffer cmd = ... transfer queue command buffer
-
- // creates new resources and
- // implicitly triggers staging transfer copy operations into cmd
- nvvk::Buffer vbo = resAllocator.createBuffer(cmd, vboSize, vboData, vboUsage);
- nvvk::Buffer ibo = resAllocator.createBuffer(cmd, iboSize, iboData, iboUsage);
-
- // use functions from staging memory manager
- // here we associate the temporary staging resources with a fence
- resAllocator.finalizeStaging( fence );
-
- // submit cmd buffer with staging copy operations
- vkQueueSubmit(... cmd ... fence ...)
-
- ...
-
- // if you do async uploads you would
- // trigger garbage collection somewhere per frame
- resAllocator.releaseStaging();
-
- \endcode
-
- Separation of memory allocation and resource creation is very flexible, but it
- can be tedious to set up for simple usecases. nvvk offers three helper ResourceAllocator
- derived classes which internally contain the MemAllocator object and manage its lifetime:
- * [ResourceAllocatorDedicated](#class nvvk::ResourceAllocatorDedicated)
- * [ResourceAllocatorDma](#class nvvk::ResourceAllocatorDma)
- * [ResourceAllocatorVma](#cass nvvk::ResourceAllocatorVma)
-
- In these cases, only one object needs to be created and initialized.
-
- ResourceAllocator can also be subclassed to specialize some of its functionality.
- Examples are [ExportResourceAllocator](#class ExportResourceAllocator) and [ExplicitDeviceMaskResourceAllocator](#class ExplicitDeviceMaskResourceAllocator).
- ExportResourceAllocator injects itself into the object allocation process such that
- the resulting allocations can be exported or created objects may be bound to exported
- memory
- ExplicitDeviceMaskResourceAllocator overrides the devicemask of allocations such that
- objects can be created on a specific device in a device group.
- */
+/**
+\class nvvk::ResourceAllocator
+
+The goal of nvvk::ResourceAllocator is to aid creation of typical Vulkan
+resources (VkBuffer, VkImage and VkAccelerationStructure).
+All memory is allocated using the provided [nvvk::MemAllocator](#class-nvvkmemallocator)
+and bound to the appropriate resources. The allocator contains a
+[nvvk::StagingMemoryManager](#class-nvvkstagingmemorymanager) and
+[nvvk::SamplerPool](#class-nvvksamplerpool) to aid this process.
+
+ResourceAllocator separates object creation and memory allocation by delegating allocation
+of memory to an object of interface type 'nvvk::MemAllocator'.
+This way the ResourceAllocator can be used with different memory allocation strategies, depending on
+needs. nvvk provides three implementations of MemAllocator:
+* nvvk::DedicatedMemoryAllocator is using a very simple allocation scheme, one VkDeviceMemory object
+per allocation. This strategy is only useful for very simple applications due to the overhead of
+vkAllocateMemory and an implementation dependent bounded number of vkDeviceMemory allocations
+possible.
+* nvvk::DMAMemoryAllocator delegates memory requests to a 'nvvk:DeviceMemoryAllocator',
+  as an example implementation of a suballocator
+* nvvk::VMAMemoryAllocator delegates memory requests to a [Vulkan Memory
+Allocator](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator)
+
+Utility wrapper structs contain the appropriate Vulkan resource and the
+appropriate nvvk::MemHandle :
+
+- nvvk::Buffer
+- nvvk::Image
+- nvvk::Texture  contains VkImage and VkImageView as well as an
+  optional VkSampler stored within VkDescriptorImageInfo
+- nvvk::AccelNV
+- nvvk::AccelKHR
+
+nvvk::Buffer, nvvk::Image, nvvk::Texture and nvvk::AccelKHR nvvk::AccelNV objects can be copied
+by value. They do not track lifetime of the underlying Vulkan objects and memory allocations.
+The corresponding destroy() functions of nvvk::ResourceAllocator destroy created objects and
+free up their memory. ResourceAllocator does not track usage of objects either. Thus, one has to
+make sure that objects are no longer in use by the GPU when they get destroyed.
+
+> Note: These classes are foremost to showcase principle components that
+> a Vulkan engine would most likely have.
+> They are geared towards ease of use in this sample framework, and
+> not optimized nor meant for production code.
+
+\code{.cpp}
+nvvk::DeviceMemoryAllocator memAllocator;
+nvvk::ResourceAllocator     resAllocator;
+
+memAllocator.init(device, physicalDevice);
+resAllocator.init(device, physicalDevice, &memAllocator);
+
+...
+
+VkCommandBuffer cmd = ... transfer queue command buffer
+
+// creates new resources and
+// implicitly triggers staging transfer copy operations into cmd
+nvvk::Buffer vbo = resAllocator.createBuffer(cmd, vboSize, vboData, vboUsage);
+nvvk::Buffer ibo = resAllocator.createBuffer(cmd, iboSize, iboData, iboUsage);
+
+// use functions from staging memory manager
+// here we associate the temporary staging resources with a fence
+resAllocator.finalizeStaging( fence );
+
+// submit cmd buffer with staging copy operations
+vkQueueSubmit(... cmd ... fence ...)
+
+...
+
+// if you do async uploads you would
+// trigger garbage collection somewhere per frame
+resAllocator.releaseStaging();
+
+\endcode
+
+Separation of memory allocation and resource creation is very flexible, but it
+can be tedious to set up for simple usecases. nvvk offers three helper ResourceAllocator
+derived classes which internally contain the MemAllocator object and manage its lifetime:
+* [ResourceAllocatorDedicated](#class nvvk::ResourceAllocatorDedicated)
+* [ResourceAllocatorDma](#class nvvk::ResourceAllocatorDma)
+* [ResourceAllocatorVma](#cass nvvk::ResourceAllocatorVma)
+
+In these cases, only one object needs to be created and initialized.
+
+ResourceAllocator can also be subclassed to specialize some of its functionality.
+Examples are [ExportResourceAllocator](#class ExportResourceAllocator) and
+[ExplicitDeviceMaskResourceAllocator](#class ExplicitDeviceMaskResourceAllocator).
+ExportResourceAllocator injects itself into the object allocation process such that
+the resulting allocations can be exported or created objects may be bound to exported
+memory
+ExplicitDeviceMaskResourceAllocator overrides the devicemask of allocations such that
+objects can be created on a specific device in a device group.
+*/
 
 namespace nvvk {
 
 // Objects
-struct Buffer
-{
-  VkBuffer  buffer = VK_NULL_HANDLE;
+struct Buffer {
+  VkBuffer buffer = VK_NULL_HANDLE;
   MemHandle memHandle{nullptr};
 };
 
-struct Image
-{
-  VkImage   image = VK_NULL_HANDLE;
-  MemHandle memHandle{nullptr};
+struct Image {
+  VkImage image = VK_NULL_HANDLE;
+  std::vector<MemHandle> memHandles;
 };
 
-struct Texture
-{
-  VkImage               image = VK_NULL_HANDLE;
-  MemHandle             memHandle{nullptr};
+struct Texture {
+  VkImage image = VK_NULL_HANDLE;
+  std::vector<MemHandle> memHandles;
   VkDescriptorImageInfo descriptor{};
 };
 
-struct AccelNV
-{
+struct AccelNV {
   VkAccelerationStructureNV accel = VK_NULL_HANDLE;
-  MemHandle                 memHandle{nullptr};
+  MemHandle memHandle{nullptr};
 };
 
-struct AccelKHR
-{
+struct AccelKHR {
   VkAccelerationStructureKHR accel = VK_NULL_HANDLE;
-  nvvk::Buffer               buffer;
+  nvvk::Buffer buffer;
 };
 
 //--------------------------------------------------------------------------------------------------
@@ -165,26 +162,22 @@ struct AccelKHR
 //
 class StagingMemoryManager;
 
-
-
-class ResourceAllocator
-{
-public:
+class ResourceAllocator {
+ public:
   ResourceAllocator(ResourceAllocator const&) = delete;
   ResourceAllocator& operator=(ResourceAllocator const&) = delete;
 
   ResourceAllocator() = default;
-  ResourceAllocator(VkDevice         device,
-                    VkPhysicalDevice physicalDevice,
-                    MemAllocator*    memAllocator,
-                    VkDeviceSize     stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
+  ResourceAllocator(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAllocator,
+                    VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
 
   // All staging buffers must be cleared before
   virtual ~ResourceAllocator();
 
   //--------------------------------------------------------------------------------------------------
   // Initialization of the allocator
-  void init(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAlloc, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
+  void init(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAlloc,
+            VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
 
   void deinit();
 
@@ -192,91 +185,86 @@ class ResourceAllocator
 
   //--------------------------------------------------------------------------------------------------
   // Basic buffer creation
-  virtual nvvk::Buffer createBuffer(const VkBufferCreateInfo&   info_,
-                                    const VkMemoryPropertyFlags memUsage_ = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+  virtual nvvk::Buffer createBuffer(
+      const VkBufferCreateInfo& info_,
+      const VkMemoryPropertyFlags memUsage_ = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
 
   //--------------------------------------------------------------------------------------------------
   // Simple buffer creation
   // implicitly sets VK_BUFFER_USAGE_TRANSFER_DST_BIT
-  nvvk::Buffer createBuffer(VkDeviceSize                size_     = 0,
-                            VkBufferUsageFlags          usage_    = VkBufferUsageFlags(),
-                            const VkMemoryPropertyFlags memUsage_ = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+  nvvk::Buffer createBuffer(
+      VkDeviceSize size_ = 0, VkBufferUsageFlags usage_ = VkBufferUsageFlags(),
+      const VkMemoryPropertyFlags memUsage_ = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
 
   //--------------------------------------------------------------------------------------------------
   // Simple buffer creation with data uploaded through staging manager
   // implicitly sets VK_BUFFER_USAGE_TRANSFER_DST_BIT
-  nvvk::Buffer createBuffer(const VkCommandBuffer& cmdBuf,
-                            const VkDeviceSize&    size_,
-                            const void*            data_,
-                            VkBufferUsageFlags     usage_,
-                            VkMemoryPropertyFlags  memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+  nvvk::Buffer createBuffer(const VkCommandBuffer& cmdBuf, const VkDeviceSize& size_,
+                            const void* data_, VkBufferUsageFlags usage_,
+                            VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
 
   //--------------------------------------------------------------------------------------------------
   // Simple buffer creation with data uploaded through staging manager
   // implicitly sets VK_BUFFER_USAGE_TRANSFER_DST_BIT
   template <typename T>
-  nvvk::Buffer createBuffer(const VkCommandBuffer& cmdBuf,
-                            const std::vector<T>&  data_,
-                            VkBufferUsageFlags     usage_,
-                            VkMemoryPropertyFlags  memProps_ = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
-  {
+  nvvk::Buffer createBuffer(const VkCommandBuffer& cmdBuf, const std::vector<T>& data_,
+                            VkBufferUsageFlags usage_,
+                            VkMemoryPropertyFlags memProps_ = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
     return createBuffer(cmdBuf, sizeof(T) * data_.size(), data_.data(), usage_, memProps_);
   }
 
-
   //--------------------------------------------------------------------------------------------------
   // Basic image creation
-  nvvk::Image createImage(const VkImageCreateInfo& info_, const VkMemoryPropertyFlags memUsage_ = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
-
+  nvvk::Image createImage(const VkImageCreateInfo& info_, const VkMemoryPropertyFlags memUsage_ =
+                                                              VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
 
   //--------------------------------------------------------------------------------------------------
   // Create an image with data uploaded through staging manager
-  nvvk::Image createImage(const VkCommandBuffer&   cmdBuf,
-                          size_t                   size_,
-                          const void*              data_,
+  nvvk::Image createImage(const VkCommandBuffer& cmdBuf, size_t size_, const void* data_,
                           const VkImageCreateInfo& info_,
-                          const VkImageLayout&     layout_ = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+                          const VkImageLayout& layout_ = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
 
   //--------------------------------------------------------------------------------------------------
-  // other variants could exist with a few defaults but we already have nvvk::makeImage2DViewCreateInfo()
-  // we could always override viewCreateInfo.image
+  // other variants could exist with a few defaults but we already have
+  // nvvk::makeImage2DViewCreateInfo() we could always override viewCreateInfo.image
   nvvk::Texture createTexture(const Image& image, const VkImageViewCreateInfo& imageViewCreateInfo);
-  nvvk::Texture createTexture(const Image& image, const VkImageViewCreateInfo& imageViewCreateInfo, const VkSamplerCreateInfo& samplerCreateInfo);
+  nvvk::Texture createTexture(const Image& image, const VkImageViewCreateInfo& imageViewCreateInfo,
+                              const VkSamplerCreateInfo& samplerCreateInfo);
 
   //--------------------------------------------------------------------------------------------------
   // shortcut that creates the image for the texture
   // - creates the image
   // - creates the texture part by associating image and sampler
   //
-  nvvk::Texture createTexture(const VkCommandBuffer&     cmdBuf,
-                              size_t                     size_,
-                              const void*                data_,
-                              const VkImageCreateInfo&   info_,
-                              const VkSamplerCreateInfo& samplerCreateInfo,
-                              const VkImageLayout&       layout_ = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
-                              bool                       isCube  = false);
+  nvvk::Texture createTexture(
+      const VkCommandBuffer& cmdBuf, size_t size_, const void* data_,
+      const VkImageCreateInfo& info_, const VkSamplerCreateInfo& samplerCreateInfo,
+      const VkImageLayout& layout_ = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, bool isCube = false);
 #ifdef VULKAN_HPP
-  inline Texture createTexture(const vk::CommandBuffer&     cmdBuf,
-                               size_t                       size_,
-                               const void*                  data_,
-                               const vk::ImageCreateInfo&   info_,
-                               const vk::SamplerCreateInfo& samplerCreateInfo,
-                               const vk::ImageLayout&       layout_ = vk::ImageLayout::eShaderReadOnlyOptimal,
-                               bool                         isCube  = false)
-  {
-    return createTexture(static_cast<VkCommandBuffer>(cmdBuf), size_, data_, static_cast<VkImageCreateInfo>(info_),
-                         static_cast<VkSamplerCreateInfo>(samplerCreateInfo), static_cast<VkImageLayout>(layout_), isCube);
+  inline Texture createTexture(
+      const vk::CommandBuffer& cmdBuf, size_t size_, const void* data_,
+      const vk::ImageCreateInfo& info_, const vk::SamplerCreateInfo& samplerCreateInfo,
+      const vk::ImageLayout& layout_ = vk::ImageLayout::eShaderReadOnlyOptimal,
+      bool isCube = false) {
+    return createTexture(static_cast<VkCommandBuffer>(cmdBuf),
+                         size_,
+                         data_,
+                         static_cast<VkImageCreateInfo>(info_),
+                         static_cast<VkSamplerCreateInfo>(samplerCreateInfo),
+                         static_cast<VkImageLayout>(layout_),
+                         isCube);
   }
 
-  nvvk::Texture createTexture(const nvvk::Image& image, const vk::ImageViewCreateInfo& imageViewCreateInfo)
-  {
+  nvvk::Texture createTexture(const nvvk::Image& image,
+                              const vk::ImageViewCreateInfo& imageViewCreateInfo) {
     return createTexture(image, static_cast<const VkImageViewCreateInfo&>(imageViewCreateInfo));
   }
-  nvvk::Texture createTexture(const nvvk::Image&             image,
+  nvvk::Texture createTexture(const nvvk::Image& image,
                               const vk::ImageViewCreateInfo& imageViewCreateInfo,
-                              const vk::SamplerCreateInfo&   samplerCreateInfo)
-  {
-    return createTexture(image, static_cast<const VkImageViewCreateInfo&>(imageViewCreateInfo), static_cast<const VkSamplerCreateInfo&>(samplerCreateInfo));
+                              const vk::SamplerCreateInfo& samplerCreateInfo) {
+    return createTexture(image,
+                         static_cast<const VkImageViewCreateInfo&>(imageViewCreateInfo),
+                         static_cast<const VkSamplerCreateInfo&>(samplerCreateInfo));
   }
 
 #endif
@@ -286,7 +274,6 @@ class ResourceAllocator
   //
   nvvk::AccelNV createAcceleration(VkAccelerationStructureCreateInfoNV& accel_);
 
-
   //--------------------------------------------------------------------------------------------------
   // Create the acceleration structure
   //
@@ -296,7 +283,7 @@ class ResourceAllocator
   // Acquire a sampler with the provided information (see nvvk::SamplerPool for details).
   // Every acquire must have an appropriate release for appropriate internal reference counting
   VkSampler acquireSampler(const VkSamplerCreateInfo& info);
-  void      releaseSampler(VkSampler sampler);
+  void releaseSampler(VkSampler sampler);
 
   //--------------------------------------------------------------------------------------------------
   // implicit staging operations triggered by create are managed here
@@ -304,10 +291,9 @@ class ResourceAllocator
   void finalizeAndReleaseStaging(VkFence fence = VK_NULL_HANDLE);
   void releaseStaging();
 
-  StagingMemoryManager*       getStaging();
+  StagingMemoryManager* getStaging();
   const StagingMemoryManager* getStaging() const;
 
-
   //--------------------------------------------------------------------------------------------------
   // Destroy
   //
@@ -321,89 +307,90 @@ class ResourceAllocator
   // Other
   //
   void* map(const nvvk::Buffer& buffer);
-  void  unmap(const nvvk::Buffer& buffer);
-  void* map(const nvvk::Image& image);
-  void  unmap(const nvvk::Image& image);
+  void unmap(const nvvk::Buffer& buffer);
+  void* map(const nvvk::Image& image, uint32_t plane);
+  void unmap(const nvvk::Image& image, uint32_t plane);
 
-  VkDevice         getDevice() const { return m_device; }
+  VkDevice getDevice() const { return m_device; }
   VkPhysicalDevice getPhysicalDevice() const { return m_physicalDevice; }
 
-
-protected:
+ protected:
   // If necessary, these can be overridden to specialize the allocation, for instance to
   // enforce allocation of exportable
-  virtual MemHandle AllocateMemory(const MemAllocateInfo& allocateInfo);
-  virtual void      CreateBufferEx(const VkBufferCreateInfo& info_, VkBuffer* buffer);
-  virtual void      CreateImageEx(const VkImageCreateInfo& info_, VkImage* image);
+  virtual MemHandle AllocateMemory(const MemAllocateInfo& allocateInfo, uint32_t plane = 0);
+  virtual void CreateBufferEx(const VkBufferCreateInfo& info_, VkBuffer* buffer);
+  virtual void CreateImageEx(const VkImageCreateInfo& info_, VkImage* image);
 
   //--------------------------------------------------------------------------------------------------
   // Finding the memory type for memory allocation
   //
   uint32_t getMemoryType(uint32_t typeBits, const VkMemoryPropertyFlags& properties);
 
-  VkDevice                              m_device{VK_NULL_HANDLE};
-  VkPhysicalDevice                      m_physicalDevice{VK_NULL_HANDLE};
-  VkPhysicalDeviceMemoryProperties      m_memoryProperties{};
-  MemAllocator*                         m_memAlloc{nullptr};
+  VkDevice m_device{VK_NULL_HANDLE};
+  VkPhysicalDevice m_physicalDevice{VK_NULL_HANDLE};
+  VkPhysicalDeviceMemoryProperties m_memoryProperties{};
+  MemAllocator* m_memAlloc{nullptr};
   std::unique_ptr<StagingMemoryManager> m_staging;
-  SamplerPool                           m_samplerPool;
-
+  SamplerPool m_samplerPool;
 
 #ifdef VULKAN_HPP
-public:
-  nvvk::Buffer createBuffer(const vk::BufferCreateInfo& info_, const vk::MemoryPropertyFlags memUsage_ = vk::MemoryPropertyFlagBits::eDeviceLocal)
-  {
-    return createBuffer(static_cast<VkBufferCreateInfo>(info_), static_cast<VkMemoryPropertyFlags>(memUsage_));
+ public:
+  nvvk::Buffer createBuffer(
+      const vk::BufferCreateInfo& info_,
+      const vk::MemoryPropertyFlags memUsage_ = vk::MemoryPropertyFlagBits::eDeviceLocal) {
+    return createBuffer(static_cast<VkBufferCreateInfo>(info_),
+                        static_cast<VkMemoryPropertyFlags>(memUsage_));
   }
 
-  nvvk::Buffer createBuffer(vk::DeviceSize size_, vk::BufferUsageFlags usage_, const vk::MemoryPropertyFlags memUsage_)
-  {
-    return createBuffer(static_cast<VkDeviceSize>(size_), static_cast<VkBufferUsageFlags>(usage_),
+  nvvk::Buffer createBuffer(vk::DeviceSize size_, vk::BufferUsageFlags usage_,
+                            const vk::MemoryPropertyFlags memUsage_) {
+    return createBuffer(static_cast<VkDeviceSize>(size_),
+                        static_cast<VkBufferUsageFlags>(usage_),
                         static_cast<VkMemoryPropertyFlags>(memUsage_));
   }
 
-  nvvk::Buffer createBuffer(const vk::CommandBuffer& cmdBuf,
-                            vk::DeviceSize           size_,
-                            const void*              data_,
-                            vk::BufferUsageFlags     usage_,
-                            vk::MemoryPropertyFlags  memUsage_ = vk::MemoryPropertyFlagBits::eDeviceLocal)
-  {
-    return createBuffer(static_cast<VkCommandBuffer>(cmdBuf), static_cast<VkDeviceSize>(size_), data_,
-                        static_cast<VkBufferUsageFlags>(usage_), static_cast<VkMemoryPropertyFlags>(memUsage_));
+  nvvk::Buffer createBuffer(
+      const vk::CommandBuffer& cmdBuf, vk::DeviceSize size_, const void* data_,
+      vk::BufferUsageFlags usage_,
+      vk::MemoryPropertyFlags memUsage_ = vk::MemoryPropertyFlagBits::eDeviceLocal) {
+    return createBuffer(static_cast<VkCommandBuffer>(cmdBuf),
+                        static_cast<VkDeviceSize>(size_),
+                        data_,
+                        static_cast<VkBufferUsageFlags>(usage_),
+                        static_cast<VkMemoryPropertyFlags>(memUsage_));
   }
 
   template <typename T>
-  nvvk::Buffer createBuffer(const vk::CommandBuffer&    cmdBuff,
-                            const std::vector<T>&       data_,
-                            const vk::BufferUsageFlags& usage_,
-                            vk::MemoryPropertyFlags     memUsage_ = vk::MemoryPropertyFlagBits::eDeviceLocal)
-  {
+  nvvk::Buffer createBuffer(
+      const vk::CommandBuffer& cmdBuff, const std::vector<T>& data_,
+      const vk::BufferUsageFlags& usage_,
+      vk::MemoryPropertyFlags memUsage_ = vk::MemoryPropertyFlagBits::eDeviceLocal) {
     return createBuffer(cmdBuff, sizeof(T) * data_.size(), data_.data(), usage_, memUsage_);
   }
 
-  nvvk::Image createImage(const vk::ImageCreateInfo&    info_,
-                          const vk::MemoryPropertyFlags memUsage_ = vk::MemoryPropertyFlagBits::eDeviceLocal)
-  {
-    return createImage(static_cast<const VkImageCreateInfo&>(info_), static_cast<VkMemoryPropertyFlags>(memUsage_));
+  nvvk::Image createImage(
+      const vk::ImageCreateInfo& info_,
+      const vk::MemoryPropertyFlags memUsage_ = vk::MemoryPropertyFlagBits::eDeviceLocal) {
+    return createImage(static_cast<const VkImageCreateInfo&>(info_),
+                       static_cast<VkMemoryPropertyFlags>(memUsage_));
   }
 
-  nvvk::Image createImage(const vk::CommandBuffer&   cmdBuff,
-                          size_t                     size_,
-                          const void*                data_,
-                          const vk::ImageCreateInfo& info_,
-                          const vk::ImageLayout&     layout_ = vk::ImageLayout::eShaderReadOnlyOptimal)
-  {
-    return createImage(static_cast<VkCommandBuffer>(cmdBuff), size_, data_, static_cast<const VkImageCreateInfo&>(info_),
+  nvvk::Image createImage(
+      const vk::CommandBuffer& cmdBuff, size_t size_, const void* data_,
+      const vk::ImageCreateInfo& info_,
+      const vk::ImageLayout& layout_ = vk::ImageLayout::eShaderReadOnlyOptimal) {
+    return createImage(static_cast<VkCommandBuffer>(cmdBuff),
+                       size_,
+                       data_,
+                       static_cast<const VkImageCreateInfo&>(info_),
                        static_cast<VkImageLayout>(layout_));
   }
 
-  nvvk::AccelNV createAcceleration(vk::AccelerationStructureCreateInfoNV& accel_)
-  {
+  nvvk::AccelNV createAcceleration(vk::AccelerationStructureCreateInfoNV& accel_) {
     return createAcceleration(static_cast<VkAccelerationStructureCreateInfoNV&>(accel_));
   }
 
-  nvvk::AccelKHR createAcceleration(vk::AccelerationStructureCreateInfoKHR& accel_)
-  {
+  nvvk::AccelKHR createAcceleration(vk::AccelerationStructureCreateInfoKHR& accel_) {
     return createAcceleration(static_cast<VkAccelerationStructureCreateInfoKHR&>(accel_));
   }
 
@@ -418,25 +405,32 @@ class DeviceMemoryAllocator;
 
 /**
  \class nvvk::ResourceAllocatorDma
- nvvk::ResourceAllocatorDMA is a convenience class owning a nvvk::DMAMemoryAllocator and nvvk::DeviceMemoryAllocator object
+ nvvk::ResourceAllocatorDMA is a convenience class owning a nvvk::DMAMemoryAllocator and
+ nvvk::DeviceMemoryAllocator object
 */
-class ResourceAllocatorDma : public ResourceAllocator
-{
-public:
+class ResourceAllocatorDma : public ResourceAllocator {
+ public:
   ResourceAllocatorDma() = default;
-  ResourceAllocatorDma(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE, VkDeviceSize memBlockSize = 0);
+  ResourceAllocatorDma(VkDevice device, VkPhysicalDevice physicalDevice,
+                       VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE,
+                       VkDeviceSize memBlockSize = 0);
   virtual ~ResourceAllocatorDma();
 
-  void init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE, VkDeviceSize memBlockSize = 0);
-  // Provided such that ResourceAllocatorDedicated, ResourceAllocatorDma and ResourceAllocatorVma all have the same interface
-  void init(VkInstance, VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE, VkDeviceSize memBlockSize = 0);
+  void init(VkDevice device, VkPhysicalDevice physicalDevice,
+            VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE,
+            VkDeviceSize memBlockSize = 0);
+  // Provided such that ResourceAllocatorDedicated, ResourceAllocatorDma and ResourceAllocatorVma
+  // all have the same interface
+  void init(VkInstance, VkDevice device, VkPhysicalDevice physicalDevice,
+            VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE,
+            VkDeviceSize memBlockSize = 0);
 
   void deinit();
 
-  nvvk::DeviceMemoryAllocator*        getDMA() { return m_dma.get(); }
-  const nvvk::DeviceMemoryAllocator*  getDMA() const { return m_dma.get(); }
+  nvvk::DeviceMemoryAllocator* getDMA() { return m_dma.get(); }
+  const nvvk::DeviceMemoryAllocator* getDMA() const { return m_dma.get(); }
 
-protected:
+ protected:
   std::unique_ptr<nvvk::DeviceMemoryAllocator> m_dma;
 };
 
@@ -446,22 +440,26 @@ class ResourceAllocatorDma : public ResourceAllocator
 
 /**
  \class nvvk::ResourceAllocatorDedicated
- \brief nvvk::ResourceAllocatorDedicated is a convenience class automatically creating and owning a DedicatedMemoryAllocator object
+ \brief nvvk::ResourceAllocatorDedicated is a convenience class automatically creating and owning a
+ DedicatedMemoryAllocator object
  */
-class ResourceAllocatorDedicated : public ResourceAllocator
-{
-public:
+class ResourceAllocatorDedicated : public ResourceAllocator {
+ public:
   ResourceAllocatorDedicated() = default;
-  ResourceAllocatorDedicated(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
+  ResourceAllocatorDedicated(VkDevice device, VkPhysicalDevice physicalDevice,
+                             VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
   virtual ~ResourceAllocatorDedicated();
 
-  void init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
-  // Provided such that ResourceAllocatorDedicated, ResourceAllocatorDma and ResourceAllocatorVma all have the same interface
-  void init(VkInstance, VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
+  void init(VkDevice device, VkPhysicalDevice physicalDevice,
+            VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
+  // Provided such that ResourceAllocatorDedicated, ResourceAllocatorDma and ResourceAllocatorVma
+  // all have the same interface
+  void init(VkInstance, VkDevice device, VkPhysicalDevice physicalDevice,
+            VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
 
   void deinit();
 
-protected:
+ protected:
   std::unique_ptr<MemAllocator> m_memAlloc;
 };
 
@@ -472,22 +470,20 @@ class ResourceAllocatorDedicated : public ResourceAllocator
 /**
  #class nvvk::ExportResourceAllocator
 
- ExportResourceAllocator specializes the object allocation process such that resulting memory allocations are
- exportable and buffers and images can be bound to external memory.
+ ExportResourceAllocator specializes the object allocation process such that resulting memory
+ allocations are exportable and buffers and images can be bound to external memory.
 */
-class ExportResourceAllocator : public ResourceAllocator
-{
-public:
+class ExportResourceAllocator : public ResourceAllocator {
+ public:
   ExportResourceAllocator() = default;
-  ExportResourceAllocator(VkDevice         device,
-                          VkPhysicalDevice physicalDevice,
-                          MemAllocator*    memAlloc,
-                          VkDeviceSize     stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
-
-protected:
-  virtual MemHandle AllocateMemory(const MemAllocateInfo& allocateInfo) override;
-  virtual void      CreateBufferEx(const VkBufferCreateInfo& info_, VkBuffer* buffer) override;
-  virtual void      CreateImageEx(const VkImageCreateInfo& info_, VkImage* image) override;
+  ExportResourceAllocator(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAlloc,
+                          VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
+
+ protected:
+  virtual MemHandle AllocateMemory(const MemAllocateInfo& allocateInfo,
+                                   uint32_t plane = 0) override;
+  virtual void CreateBufferEx(const VkBufferCreateInfo& info_, VkBuffer* buffer) override;
+  virtual void CreateImageEx(const VkImageCreateInfo& info_, VkImage* image) override;
 };
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -496,20 +492,22 @@ class ExportResourceAllocator : public ResourceAllocator
 
 /**
  \class nvvk::ExportResourceAllocatorDedicated
- nvvk::ExportResourceAllocatorDedicated is a resource allocator that is using DedicatedMemoryAllocator to allocate memory
- and at the same time it'll make all allocations exportable.
+ nvvk::ExportResourceAllocatorDedicated is a resource allocator that is using
+ DedicatedMemoryAllocator to allocate memory and at the same time it'll make all allocations
+ exportable.
 */
-class ExportResourceAllocatorDedicated : public ExportResourceAllocator
-{
-public:
+class ExportResourceAllocatorDedicated : public ExportResourceAllocator {
+ public:
   ExportResourceAllocatorDedicated() = default;
-  ExportResourceAllocatorDedicated(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
+  ExportResourceAllocatorDedicated(VkDevice device, VkPhysicalDevice physicalDevice,
+                                   VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
   virtual ~ExportResourceAllocatorDedicated() override;
 
-  void init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
+  void init(VkDevice device, VkPhysicalDevice physicalDevice,
+            VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
   void deinit();
 
-protected:
+ protected:
   std::unique_ptr<MemAllocator> m_memAlloc;
 };
 
@@ -519,19 +517,22 @@ class ExportResourceAllocatorDedicated : public ExportResourceAllocator
 
 /**
  \class nvvk::ExplicitDeviceMaskResourceAllocator
- nvvk::ExplicitDeviceMaskResourceAllocator is a resource allocator that will inject a specific devicemask into each
- allocation, making the created allocations and objects available to only the devices in the mask.
+ nvvk::ExplicitDeviceMaskResourceAllocator is a resource allocator that will inject a specific
+ devicemask into each allocation, making the created allocations and objects available to only the
+ devices in the mask.
 */
-class ExplicitDeviceMaskResourceAllocator : public ResourceAllocator
-{
-public:
+class ExplicitDeviceMaskResourceAllocator : public ResourceAllocator {
+ public:
   ExplicitDeviceMaskResourceAllocator() = default;
-  ExplicitDeviceMaskResourceAllocator(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAlloc, uint32_t deviceMask);
+  ExplicitDeviceMaskResourceAllocator(VkDevice device, VkPhysicalDevice physicalDevice,
+                                      MemAllocator* memAlloc, uint32_t deviceMask);
 
-  void init(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAlloc, uint32_t deviceMask);
+  void init(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAlloc,
+            uint32_t deviceMask);
 
-protected:
-  virtual MemHandle AllocateMemory(const MemAllocateInfo& allocateInfo) override;
+ protected:
+  virtual MemHandle AllocateMemory(const MemAllocateInfo& allocateInfo,
+                                   uint32_t plane = 0) override;
 
   uint32_t m_deviceMask;
 };
diff --git a/modules/holoviz/thirdparty/nvpro_core/nvvk/samplers_vk.cpp b/modules/holoviz/thirdparty/nvpro_core/nvvk/samplers_vk.cpp
index 4259f65..38b2458 100644
--- a/modules/holoviz/thirdparty/nvpro_core/nvvk/samplers_vk.cpp
+++ b/modules/holoviz/thirdparty/nvpro_core/nvvk/samplers_vk.cpp
@@ -56,8 +56,8 @@ VkSampler SamplerPool::acquireSampler(const VkSamplerCreateInfo& createInfo)
       case VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO:
         state.reduction = *(const VkSamplerReductionModeCreateInfo*)ext;
         break;
-      case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO:
-        state.ycbr = *(const VkSamplerYcbcrConversionCreateInfo*)ext;
+      case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO:
+        state.ycbr = *(const VkSamplerYcbcrConversionInfo*)ext;
         break;
       default:
         assert(0 && "unsupported sampler create");
diff --git a/modules/holoviz/thirdparty/nvpro_core/nvvk/samplers_vk.hpp b/modules/holoviz/thirdparty/nvpro_core/nvvk/samplers_vk.hpp
index f5b17ee..40b32cc 100644
--- a/modules/holoviz/thirdparty/nvpro_core/nvvk/samplers_vk.hpp
+++ b/modules/holoviz/thirdparty/nvpro_core/nvvk/samplers_vk.hpp
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  *
- * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 
@@ -84,7 +84,7 @@ class SamplerPool
   {
     VkSamplerCreateInfo                createInfo;
     VkSamplerReductionModeCreateInfo   reduction;
-    VkSamplerYcbcrConversionCreateInfo ycbr;
+    VkSamplerYcbcrConversionInfo ycbr;
 
     SamplerState() { memset(this, 0, sizeof(SamplerState)); }
 
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 12404ff..5e8b56f 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -53,6 +53,12 @@ if(HOLOSCAN_BUILD_TESTS)
     WORKING_DIRECTORY ${CMAKE_PYTHON_WORKING_DIR}
   )
 
+  add_test(NAME cli-tests
+    COMMAND ${PYTHON_EXECUTABLE} -m pytest ${CMAKE_PYTHON_WORKING_DIR}/tests/cli/unit -v --durations=0
+    # either have to run from this working directory or set PYTHONPATH
+    WORKING_DIRECTORY ${CMAKE_PYTHON_WORKING_DIR}
+  )
+
   # rerun distributed tests but with the event-based scheduler
   # (omit serialization tests on this second run to keep overall time down)
   add_test(NAME python-api-system-distributed-ebs-tests
@@ -60,6 +66,7 @@ if(HOLOSCAN_BUILD_TESTS)
     # either have to run from this working directory or set PYTHONPATH
     WORKING_DIRECTORY ${CMAKE_PYTHON_WORKING_DIR}
   )
+
   # Tracking any fatal error reported by the application tests
   set_tests_properties(python-api-system-tests PROPERTIES
                        FAIL_REGULAR_EXPRESSION "Fatal Python error")
@@ -67,6 +74,9 @@ if(HOLOSCAN_BUILD_TESTS)
   set_tests_properties(python-api-system-distributed-tests PROPERTIES
                        FAIL_REGULAR_EXPRESSION "Fatal Python error")
 
+  set_tests_properties(cli-tests PROPERTIES
+                       FAIL_REGULAR_EXPRESSION "Fatal Python error")
+
   set_tests_properties(python-api-system-distributed-ebs-tests PROPERTIES
                        FAIL_REGULAR_EXPRESSION "Fatal Python error")
 
diff --git a/python/holoscan/__init__.py b/python/holoscan/__init__.py
index e89865e..cf429da 100644
--- a/python/holoscan/__init__.py
+++ b/python/holoscan/__init__.py
@@ -15,15 +15,21 @@
  limitations under the License.
 """  # noqa: E501
 
+import importlib.metadata
+
 # We import cli, core and gxf to make sure they're available before other modules that rely on them
 from . import cli, core, gxf
 
-try:
-    from ._version import __version__
-except ImportError:
-    __version__ = "unknown version"
+
+def _set_version():
+    try:
+        return importlib.metadata.version("holoscan")
+    except ImportError:
+        return "unknown version"
+
 
 __all__ = ["__version__", "as_tensor", "cli", "core", "gxf"]
+__version__ = _set_version()
 
 
 def as_tensor(obj):
diff --git a/python/holoscan/cli/common/artifact_sources.py b/python/holoscan/cli/common/artifact_sources.py
index db386da..698aa05 100644
--- a/python/holoscan/cli/common/artifact_sources.py
+++ b/python/holoscan/cli/common/artifact_sources.py
@@ -1,57 +1,64 @@
 """
- SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- SPDX-License-Identifier: Apache-2.0
+SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
 
- http://www.apache.org/licenses/LICENSE-2.0
+http://www.apache.org/licenses/LICENSE-2.0
 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
 """  # noqa: E501
 
 import json
 import logging
-from typing import Any, List, Optional
+from typing import Any, Dict, List, Optional
 
 import requests
+from packaging.version import Version
 
-from .enum_types import Arch, PlatformConfiguration, SdkType
+from holoscan import __version__ as holoscan_version_string
+
+from .enum_types import PlatformConfiguration, SdkType
 from .exceptions import InvalidSourceFileError, ManifestDownloadError
 
 
 class ArtifactSources:
     """Provides default artifact source URLs with the ability to override."""
 
-    SectionDebianPackages = "debian-packages"
+    SectionWheelVersion = "wheel-version"
+    SectionDebianVersion = "debian-version"
     SectionBaseImages = "base-images"
     SectionBuildImages = "build-images"
     SectionHealthProbe = "health-probes"
-    ManifestFileUrl = (
-        "https://edge.urm.nvidia.com/artifactory/sw-holoscan-cli-generic/artifacts.json"
-    )
     EdgeROToken = "eyJ2ZXIiOiIyIiwidHlwIjoiSldUIiwiYWxnIjoiUlMyNTYiLCJraWQiOiJLcXV1ZVdTTlRjSkhqTFhGLTJCSnctX0lkRnY0eVhqREJyNEdWMU5Gc2NJIn0.eyJzdWIiOiJqZnJ0QDAxZHRqNnF0ZWNmcnB6MXJrNmg2cjAwd2FkXC91c2Vyc1wvc3ZjLWhvbG9zY2FuLWNsaS1wdWJsaWMtcm8iLCJzY3AiOiJtZW1iZXItb2YtZ3JvdXBzOnN2Yy1ob2xvc2Nhbi1jbGktcHVibGljLWdyb3VwIiwiYXVkIjoiamZydEAwMWR0ajZxdGVjZnJwejFyazZoNnIwMHdhZCIsImlzcyI6ImpmcnRAMDFkdGo2cXRlY2ZycHoxcms2aDZyMDB3YWRcL3VzZXJzXC9ycGFsYW5pc3dhbXkiLCJpYXQiOjE3MDY1NzA1NjUsImp0aSI6IjlmNmEyMmM1LTk5ZTItNGRlMi1hMDhiLTQxZjg2NzIyYmJjNyJ9.Y0gfyW2F0kxiKnMhGzNCyRRE2DNrDW6CUj5ozrQiIvAbSbhohskFcFmP836PU4p3ZQTzbYk9-bBwrqoPDUaZf8p9AW9GZ3mvlU2BxK0EQ-F4oKxA1_Z7agZ0KKcmcrfWnE4Ffy53qAD8PTk5vdcznpYOBpJtF4i16j2QcXvhVGGEqUyGa7_sONdK0sevb3ZztiEoupi4gD2wPTRn30rjpGIiFSDKiswAQwoyF_SqMCQWOBEeXMISp8hkEggUpvPrESv2lbpjgaKuEJ1CikbivYTJCcoqpgH7E72FXr1sB9jfwrFD8pkjtRpGGDxN43waXy4f3Ctr8_rpbmCvwSa9iw"  # noqa: E501
+    HoloscanVersion = None
+    ManifestFileUrl = None
 
     def __init__(self) -> None:
+        ArtifactSources.HoloscanVersion = ".".join(
+            str(i) for i in Version(holoscan_version_string).release[0:3]
+        )
+        ArtifactSources.ManifestFileUrl = f"https://edge.urm.nvidia.com/artifactory/sw-holoscan-cli-generic/{ArtifactSources.HoloscanVersion}/artifacts.json"
         self._logger = logging.getLogger("common")
-        self._supported_holoscan_versions = ["2.0.0", "2.1.0", "2.2.0", "2.3.0"]
+        self._supported_holoscan_versions = ["2.4.0"]
 
     @property
     def holoscan_versions(self) -> List[str]:
         return self._supported_holoscan_versions
 
-    def base_images(self, version) -> List[Any]:
+    def base_image(self, version) -> str:
         return self._data[version][SdkType.Holoscan.value][ArtifactSources.SectionBaseImages]
 
-    def build_images(self, version) -> List[Any]:
+    def build_images(self, version) -> Dict[Any, str]:
         return self._data[version][SdkType.Holoscan.value][ArtifactSources.SectionBuildImages]
 
-    def health_probe(self, version) -> List[Any]:
+    def health_probe(self, version) -> Dict[Any, str]:
         return self._data[version][ArtifactSources.SectionHealthProbe]
 
     def load(self, uri: str):
@@ -86,12 +93,12 @@ def validate(self, data: Any):
             assert SdkType.Holoscan.value in item
             holoscan = item[SdkType.Holoscan.value]
 
-            assert ArtifactSources.SectionDebianPackages in holoscan
+            assert ArtifactSources.SectionWheelVersion in holoscan
+            assert ArtifactSources.SectionDebianVersion in holoscan
             assert ArtifactSources.SectionBaseImages in holoscan
             assert ArtifactSources.SectionBuildImages in holoscan
 
             for config in PlatformConfiguration:
-                assert config.value in holoscan[ArtifactSources.SectionBaseImages]
                 assert config.value in holoscan[ArtifactSources.SectionBuildImages]
 
     def download_manifest(self):
@@ -114,31 +121,28 @@ def _download_manifest_internal(self, url, headers=None):
             self._data = manifest.json()
             self.validate(self._data)
 
-    def debian_packages(
-        self, version: str, architecture: Arch, platform_configuration: PlatformConfiguration
-    ) -> Optional[str]:
-        """Gets the URI of a Debian package based on the version,
-        the architecture and the platform configuration.
+    def debian_package_version(self, version: str) -> Optional[str]:
+        """Gets the version of the Debian package based on the version of Holoscan.
+
+        Args:
+            version (str): version of Holoscan
+
+        Returns:
+            Optional[str]: Debian package version
+        """
+        return (
+            self._data[version][SdkType.Holoscan.value][ArtifactSources.SectionDebianVersion]
+            if version in self._data
+            else None
+        )
+
+    def wheel_package_version(self, version: str) -> Optional[str]:
+        """Gets the version of the PyPI package based on the version of Holoscan.
 
         Args:
-            version (str): version of package
-            architecture (Arch): architecture of the package
-            platform_configuration (PlatformConfiguration): platform configuration of the package
+            version (str): version of Holoscan
 
         Returns:
-            Optional[str]: _description_
+            Optional[str]: PyPI package version
         """
-        debian_sources = self._data[version][SdkType.Holoscan.value][
-            ArtifactSources.SectionDebianPackages
-        ]
-
-        if architecture == Arch.amd64 and architecture.value in debian_sources:
-            return debian_sources[architecture.value]
-        elif (
-            architecture == Arch.arm64
-            and architecture.value in debian_sources
-            and platform_configuration.value in debian_sources[architecture.value]
-        ):
-            return debian_sources[architecture.value][platform_configuration.value]
-
-        return None
+        return self._data[version][SdkType.Holoscan.value][ArtifactSources.SectionWheelVersion]
diff --git a/python/holoscan/cli/common/constants.py b/python/holoscan/cli/common/constants.py
index 05b2727..319e709 100644
--- a/python/holoscan/cli/common/constants.py
+++ b/python/holoscan/cli/common/constants.py
@@ -52,7 +52,6 @@ class Constants:
     PYTHON_EXECUTABLE = "python3"
     PYTHON_MAIN_FILE = "__main__.py"
 
-    PYPI_INSTALL_SOURCE = "pypi.org"
     TARBALL_FILE_EXTENSION = ".tar"
 
     DEBIAN_FILE_EXTENSION = ".deb"
diff --git a/python/holoscan/cli/common/dockerutils.py b/python/holoscan/cli/common/dockerutils.py
index d1036ab..36d471c 100644
--- a/python/holoscan/cli/common/dockerutils.py
+++ b/python/holoscan/cli/common/dockerutils.py
@@ -425,7 +425,10 @@ def _start_container(
             if not quiet:
                 print(log[1].decode("utf-8"))
         elif log[0] == "stderr":
-            print(str(log[1].decode("utf-8")))
+            try:
+                print(str(log[1].decode("utf-8")))
+            except Exception:
+                print(str(log[1]))
 
     logger.info(f"Container '{container_name}'({container_id}) exited.")
 
diff --git a/python/holoscan/cli/package-source.json b/python/holoscan/cli/package-source.json
index 3a50848..503c885 100644
--- a/python/holoscan/cli/package-source.json
+++ b/python/holoscan/cli/package-source.json
@@ -1,135 +1,32 @@
 {
-    "0.6.0": {
-        "holoscan": {
-            "debian-packages": {
-                "linux/amd64": "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/holoscan_0.6.0.3-1_amd64.deb",
-                "linux/arm64": {
-                    "igpu": "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/arm64/holoscan_0.6.0.3-1_arm64.deb",
-                    "dgpu": "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/sbsa/holoscan_0.6.0.3-1_arm64.deb"
-                }
-            },
-            "base-images": {
-                "igpu": {
-                    "jetson-agx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-igpu",
-                    "igx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-igpu"
-                },
-                "dgpu": {
-                    "x64-workstation": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-dgpu",
-                    "igx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-dgpu",
-                    "clara-agx-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-dgpu"
-                },
-                "igpu-assist": {
-                    "clara-agx-devkit": "nvcr.io/nvidia/clara-holoscan/l4t-compute-assist:r34.1.0-r8.4.0-runtime",
-                    "igx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/l4t-compute-assist:r35.3.0-r8.5.2-runtime"
-                },
-                "cpu": {
-                    "x64-workstation": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-dgpu"
-                }
-            },
-            "build-images": {
-                "igpu": {
-                    "jetson-agx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-igpu",
-                    "igx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-igpu"
-                },
-                "dgpu": {
-                    "x64-workstation": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-dgpu",
-                    "igx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-dgpu",
-                    "clara-agx-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-dgpu"
-                },
-                "igpu-assist": {
-                    "clara-agx-devkit": "nvcr.io/nvidia/clara-holoscan/l4t-compute-assist:r34.1.0-r8.4.0-runtime",
-                    "igx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/l4t-compute-assist:r35.3.0-r8.5.2-runtime"
-                },
-                "cpu": {
-                    "x64-workstation": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-dgpu"
-                }
-            }
+  "2.4.0": {
+    "holoscan": {
+      "debian-version": "2.3.0.1-1",
+      "wheel-version": "2.3.0",
+      "base-images": {
+        "dgpu": "nvcr.io/nvidia/cuda:12.2.2-runtime-ubuntu22.04",
+        "igpu": "nvcr.io/nvidia/tensorrt:23.12-py3-igpu"
+      },
+      "build-images": {
+        "igpu": {
+          "jetson-agx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v2.3.0-igpu",
+          "igx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v2.3.0-igpu",
+          "sbsa": "nvcr.io/nvidia/clara-holoscan/holoscan:v2.3.0-igpu"
         },
-        "health-probes": {
-            "linux/amd64": "https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/v0.4.19/grpc_health_probe-linux-amd64",
-            "linux/arm64": "https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/v0.4.19/grpc_health_probe-linux-arm64"
-        }
-    },
-    "1.0.3": {
-        "holoscan": {
-            "debian-packages": {
-                "linux/amd64": "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/holoscan_1.0.3.0-1_amd64.deb",
-                "linux/arm64": {
-                    "igpu": "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/holoscan_1.0.3.0-1_arm64.deb",
-                    "dgpu": "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/sbsa/holoscan_1.0.3.0-1_arm64.deb"
-                }
-            },
-            "base-images": {
-                "igpu": {
-                    "jetson-agx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v1.0.3-igpu",
-                    "igx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v1.0.3-igpu"
-                },
-                "dgpu": {
-                    "x64-workstation": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-dgpu",
-                    "igx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-dgpu"
-                },
-                "cpu": {
-                    "x64-workstation": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-dgpu"
-                }
-            },
-            "build-images": {
-                "igpu": {
-                    "jetson-agx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v1.0.3-igpu",
-                    "igx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v1.0.3-igpu"
-                },
-                "dgpu": {
-                    "x64-workstation": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-dgpu",
-                    "igx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-dgpu"
-                },
-                "cpu": {
-                    "x64-workstation": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-dgpu"
-                }
-            }
+        "dgpu": {
+          "x64-workstation": "nvcr.io/nvidia/clara-holoscan/holoscan:v2.3.0-dgpu",
+          "igx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v2.3.0-dgpu",
+          "sbsa": "nvcr.io/nvidia/clara-holoscan/holoscan:v2.3.0-dgpu",
+          "clara-agx-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v2.3.0-dgpu"
         },
-        "health-probes": {
-            "linux/amd64": "https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/v0.4.24/grpc_health_probe-linux-amd64",
-            "linux/arm64": "https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/v0.4.24/grpc_health_probe-linux-arm64"
+        "cpu": {
+          "x64-workstation": "nvcr.io/nvidia/clara-holoscan/holoscan:v2.3.0-dgpu"
         }
+      }
     },
-    "2.0.0": {
-        "holoscan": {
-            "debian-packages": {
-                "linux/amd64": "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/holoscan_1.0.3.0-1_amd64.deb",
-                "linux/arm64": {
-                    "igpu": "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/holoscan_1.0.3.0-1_arm64.deb",
-                    "dgpu": "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/sbsa/holoscan_1.0.3.0-1_arm64.deb"
-                }
-            },
-            "base-images": {
-                "igpu": {
-                    "jetson-agx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v1.0.3-igpu",
-                    "igx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v1.0.3-igpu"
-                },
-                "dgpu": {
-                    "x64-workstation": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-dgpu",
-                    "igx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-dgpu"
-                },
-                "cpu": {
-                    "x64-workstation": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-dgpu"
-                }
-            },
-            "build-images": {
-                "igpu": {
-                    "jetson-agx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v1.0.3-igpu",
-                    "igx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v1.0.3-igpu"
-                },
-                "dgpu": {
-                    "x64-workstation": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-dgpu",
-                    "igx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-dgpu"
-                },
-                "cpu": {
-                    "x64-workstation": "nvcr.io/nvidia/clara-holoscan/holoscan:v0.6.0-dgpu"
-                }
-            }
-        },
-        "health-probes": {
-            "linux/amd64": "https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/v0.4.24/grpc_health_probe-linux-amd64",
-            "linux/arm64": "https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/v0.4.24/grpc_health_probe-linux-arm64"
-        }
+    "health-probes": {
+      "linux/amd64": "https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/v0.4.19/grpc_health_probe-linux-amd64",
+      "linux/arm64": "https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/v0.4.19/grpc_health_probe-linux-arm64"
     }
+  }
 }
\ No newline at end of file
diff --git a/python/holoscan/cli/packager/arguments.py b/python/holoscan/cli/packager/arguments.py
index dc2efa9..c244adc 100644
--- a/python/holoscan/cli/packager/arguments.py
+++ b/python/holoscan/cli/packager/arguments.py
@@ -1,18 +1,18 @@
 """
- SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- SPDX-License-Identifier: Apache-2.0
+SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
 
- http://www.apache.org/licenses/LICENSE-2.0
+http://www.apache.org/licenses/LICENSE-2.0
 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
 """  # noqa: E501
 
 import logging
@@ -69,13 +69,14 @@ def __init__(self, args: Namespace, temp_dir: str) -> None:
         self.build_parameters.uid = args.uid
         self.build_parameters.gid = args.gid
         self.build_parameters.build_cache = args.build_cache
-        self.build_parameters.config_file = args.config
+        self.build_parameters.app_config_file_path = args.config
         self.build_parameters.timeout = args.timeout if args.timeout else DefaultValues.TIMEOUT
         self.build_parameters.docs = args.docs if args.docs else None
         self.build_parameters.application = args.application
         self.build_parameters.no_cache = args.no_cache
         self.build_parameters.tarball_output = args.output
         self.build_parameters.cmake_args = args.cmake_args
+        self.build_parameters.includes = args.includes
 
         models = Models()
         platform = Platform(self._artifact_sources)
diff --git a/python/holoscan/cli/packager/container_builder.py b/python/holoscan/cli/packager/container_builder.py
index 366e3dd..c1f7c3a 100644
--- a/python/holoscan/cli/packager/container_builder.py
+++ b/python/holoscan/cli/packager/container_builder.py
@@ -17,6 +17,7 @@
 
 import logging
 import os
+import pprint
 import shutil
 from pathlib import Path
 from typing import Optional
@@ -141,6 +142,7 @@ def _build_internal(
             "UID": self._build_parameters.uid,
             "GID": self._build_parameters.gid,
             "UNAME": self._build_parameters.username,
+            "GPU_TYPE": platform_parameters.platform_config.value,
         }
 
         self._logger.debug(f"Building Holoscan Application Package: tag={platform_parameters.tag}")
@@ -180,6 +182,7 @@ def print_build_info(self, platform_parameters):
     SDK Version:                    {self._build_parameters.holoscan_sdk_version}
     SDK:                            {self._build_parameters.sdk.value}
     Tag:                            {platform_parameters.tag}
+    Included features/dependencies: {", ".join(self._build_parameters.includes) if self._build_parameters.includes else "N/A"}
     """  # noqa: E501
         )
 
@@ -234,7 +237,7 @@ def _copy_application(self):
             shutil.copytree(self._build_parameters.application, target_application_path)
 
         target_config_file_path = Path(os.path.join(self._temp_dir, "app.config"))
-        shutil.copyfile(self._build_parameters.config_file, target_config_file_path)
+        shutil.copyfile(self._build_parameters.app_config_file_path, target_config_file_path)
 
     def _copy_model_files(self):
         """Copy models to temporary location"""
@@ -267,12 +270,26 @@ def _get_template(self, platform_parameters: PlatformParameters):
             trim_blocks=True,
             lstrip_blocks=True,
         )
+        self._logger.debug(
+            f"""
+========== Begin Build Parameters ==========
+{pprint.pformat(self._build_parameters.to_jinja)}
+=========== End Build Parameters ===========
+"""
+        )
+        self._logger.debug(
+            f"""
+========== Begin Platform Parameters ==========
+{pprint.pformat(platform_parameters.to_jinja)}
+=========== End Platform Parameters ===========
+"""
+        )
 
         jinja_template = jinja_env.get_template("Dockerfile.jinja2")
         return jinja_template.render(
             {
-                **self._build_parameters.to_jina,
-                **platform_parameters.to_jina,
+                **self._build_parameters.to_jinja,
+                **platform_parameters.to_jinja,
             }
         )
 
@@ -318,7 +335,7 @@ def _copy_pip_requirements(self):
                 requirements_file.writelines("\n".join(self._build_parameters.pip_packages))
 
     def _copy_sdk_file(self, sdk_file: Optional[Path]):
-        if sdk_file is not None and sdk_file != Constants.PYPI_INSTALL_SOURCE:
+        if sdk_file is not None and os.path.isfile(sdk_file):
             dest = os.path.join(self._temp_dir, sdk_file.name)
             if os.path.exists(dest):
                 os.remove(dest)
@@ -339,7 +356,9 @@ def __init__(
 
     def _copy_supporting_files(self, platform_parameters: PlatformParameters):
         """Copies the SDK file to the temporary directory"""
-        if platform_parameters.holoscan_sdk_file is not None:
+        if platform_parameters.holoscan_sdk_file is not None and os.path.isfile(
+            platform_parameters.holoscan_sdk_file
+        ):
             dest = os.path.join(self._temp_dir, platform_parameters.holoscan_sdk_file.name)
             if os.path.exists(dest):
                 os.remove(dest)
diff --git a/python/holoscan/cli/packager/package_command.py b/python/holoscan/cli/packager/package_command.py
index 86b8390..28db356 100644
--- a/python/holoscan/cli/packager/package_command.py
+++ b/python/holoscan/cli/packager/package_command.py
@@ -1,18 +1,18 @@
 """
- SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- SPDX-License-Identifier: Apache-2.0
+SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
 
- http://www.apache.org/licenses/LICENSE-2.0
+http://www.apache.org/licenses/LICENSE-2.0
 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
 """  # noqa: E501
 
 import argparse
@@ -94,6 +94,13 @@ def create_package_parser(
         type=str,
         help="container image name for building the C++ application.",
     )
+    advanced_group.add_argument(
+        "--includes",
+        nargs="*",
+        default=[],
+        choices=["debug", "holoviz", "torch", "onnx"],
+        help="additional packages to include in the container.",
+    )
     advanced_group.add_argument(
         "--build-cache",
         type=valid_dir_path,
diff --git a/python/holoscan/cli/packager/parameters.py b/python/holoscan/cli/packager/parameters.py
index 16023e4..c08c873 100644
--- a/python/holoscan/cli/packager/parameters.py
+++ b/python/holoscan/cli/packager/parameters.py
@@ -1,18 +1,18 @@
 """
- SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- SPDX-License-Identifier: Apache-2.0
+SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
 
- http://www.apache.org/licenses/LICENSE-2.0
+http://www.apache.org/licenses/LICENSE-2.0
 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
 """  # noqa: E501
 
 import logging
@@ -35,6 +35,8 @@ def __init__(
         self._platform: Platform = platform
         self._platform_config: PlatformConfiguration = platform_config
         self._arch: Arch = SDK.PLATFORM_MAPPINGS[platform]
+        self._tag_prefix: Optional[str]
+        self._version: Optional[str]
 
         (self._tag_prefix, self._version) = parse_docker_image_name_and_tag(tag)
 
@@ -55,6 +57,9 @@ def __init__(
         self._data["custom_base_image"] = False
         self._data["custom_holoscan_sdk"] = False
         self._data["custom_monai_deploy_sdk"] = False
+        self._data["target_arch"] = "aarch64" if self._arch == Arch.arm64 else "x86_64"
+        self._data["cuda_deb_arch"] = "sbsa" if self._arch == Arch.arm64 else "x86_64"
+        self._data["holoscan_deb_arch"] = "arm64" if self._arch == Arch.arm64 else "amd64"
 
     @property
     def tag(self) -> str:
@@ -119,8 +124,8 @@ def holoscan_sdk_file(self, value: Path):
         self._data["holoscan_sdk_file"] = value
         if value is not None and hasattr(value, "name"):
             self._data["holoscan_sdk_filename"] = value.name
-        elif value == Constants.PYPI_INSTALL_SOURCE:
-            self._data["holoscan_sdk_filename"] = Constants.PYPI_INSTALL_SOURCE
+        elif value is not None:
+            self._data["holoscan_sdk_filename"] = value
 
     @property
     def monai_deploy_sdk_file(self) -> Optional[Path]:
@@ -131,8 +136,6 @@ def monai_deploy_sdk_file(self, value: Path):
         self._data["monai_deploy_sdk_file"] = value
         if value is not None and hasattr(value, "name"):
             self._data["monai_deploy_sdk_filename"] = value.name
-        elif value == Constants.PYPI_INSTALL_SOURCE:
-            self._data["monai_deploy_sdk_filename"] = Constants.PYPI_INSTALL_SOURCE
 
     @property
     def version(self) -> str:
@@ -143,7 +146,7 @@ def health_probe(self) -> Optional[Path]:
         return self._data.get("health_probe", None)
 
     @health_probe.setter
-    def health_probe(self, value: Path):
+    def health_probe(self, value: Optional[Path]):
         self._data["health_probe"] = value
 
     @property
@@ -163,7 +166,7 @@ def platform_config(self) -> PlatformConfiguration:
         return self._platform_config
 
     @property
-    def to_jina(self) -> Dict[str, Any]:
+    def to_jinja(self) -> Dict[str, Any]:
         return self._data
 
     @property
@@ -172,41 +175,53 @@ def same_arch_as_system(self) -> bool:
             platform.machine() == "x86_64" and self._arch == Arch.amd64
         )
 
+    @property
+    def cuda_deb_arch(self) -> str:
+        return self._data["cuda_deb_arch"]
+
+    @property
+    def holoscan_deb_arch(self) -> str:
+        return self._data["holoscan_deb_arch"]
+
+    @property
+    def target_arch(self) -> str:
+        return self._data["target_arch"]
+
 
 class PlatformBuildResults:
     def __init__(self, parameters: PlatformParameters):
         self._parameters = parameters
-        self._docker_tag = None
-        self._tarball_filenaem = None
+        self._docker_tag: Optional[str] = None
+        self._tarball_filenaem: Optional[str] = None
         self._succeeded = False
-        self._error = None
+        self._error: Optional[str] = None
 
     @property
     def parameters(self) -> PlatformParameters:
         return self._parameters
 
     @property
-    def error(self) -> Exception:
+    def error(self) -> Optional[str]:
         return self._error
 
     @error.setter
-    def error(self, value: Exception):
+    def error(self, value: Optional[str]):
         self._error = value
 
     @property
-    def docker_tag(self) -> str:
+    def docker_tag(self) -> Optional[str]:
         return self._docker_tag
 
     @docker_tag.setter
-    def docker_tag(self, value: str):
+    def docker_tag(self, value: Optional[str]):
         self._docker_tag = value
 
     @property
-    def tarball_filenaem(self) -> str:
+    def tarball_filenaem(self) -> Optional[str]:
         return self._tarball_filenaem
 
     @tarball_filenaem.setter
-    def tarball_filenaem(self, value: str):
+    def tarball_filenaem(self, value: Optional[str]):
         self._tarball_filenaem = value
 
     @property
@@ -232,8 +247,6 @@ def __init__(self):
         self._data["logs_dir"] = DefaultValues.HOLOSCAN_LOGS_DIR
         self._data["full_input_path"] = DefaultValues.WORK_DIR / DefaultValues.INPUT_DIR
         self._data["full_output_path"] = DefaultValues.WORK_DIR / DefaultValues.OUTPUT_DIR
-        self._data["cuda_deb_arch"] = "sbsa" if platform.processor() == "aarch64" else "x86_64"
-        self._data["holoscan_deb_arch"] = "arm64" if platform.processor() == "aarch64" else "amd64"
         self._data["input_dir"] = DefaultValues.INPUT_DIR
         self._data["models_dir"] = DefaultValues.MODELS_DIR
         self._data["output_dir"] = DefaultValues.OUTPUT_DIR
@@ -247,10 +260,12 @@ def __init__(self):
         self._data["gid"] = os.getgid()
         self._data["tarball_output"] = None
         self._data["cmake_args"] = ""
+        self._data["includes"] = []
 
         self._data["application_directory"] = None
         self._data["application_type"] = None
         self._data["application"] = None
+        self._data["app_config_file_path"] = None
         self._data["command"] = None
         self._data["no_cache"] = False
         self._data["pip_packages"] = None
@@ -268,14 +283,6 @@ def build_cache(self) -> int:
     def build_cache(self, value: int):
         self._data["build_cache"] = value
 
-    @property
-    def cuda_deb_arch(self) -> str:
-        return self._data["cuda_deb_arch"]
-
-    @property
-    def holoscan_deb_arch(self) -> str:
-        return self._data["holoscan_deb_arch"]
-
     @property
     def full_input_path(self) -> str:
         return self._data["full_input_path"]
@@ -386,6 +393,14 @@ def no_cache(self, value):
     def config_file_path(self):
         return self._data["config_file_path"]
 
+    @property
+    def app_config_file_path(self):
+        return self._data["app_config_file_path"]
+
+    @app_config_file_path.setter
+    def app_config_file_path(self, value):
+        self._data["app_config_file_path"] = value
+
     @property
     def app_dir(self):
         return self._data["app_dir"]
@@ -496,7 +511,15 @@ def monai_deploy_app_sdk_version(self, value: str):
         self._data["monai_deploy_app_sdk_version"] = value
 
     @property
-    def to_jina(self) -> Dict[str, Any]:
+    def includes(self) -> str:
+        return self._data["includes"]
+
+    @includes.setter
+    def includes(self, value: str):
+        self._data["includes"] = value
+
+    @property
+    def to_jinja(self) -> Dict[str, Any]:
         return self._data
 
     def _detect_application_type(self) -> ApplicationType:
diff --git a/python/holoscan/cli/packager/platforms.py b/python/holoscan/cli/packager/platforms.py
index 2b5329b..c43c10f 100644
--- a/python/holoscan/cli/packager/platforms.py
+++ b/python/holoscan/cli/packager/platforms.py
@@ -1,18 +1,18 @@
 """
- SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- SPDX-License-Identifier: Apache-2.0
+SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
 
- http://www.apache.org/licenses/LICENSE-2.0
+http://www.apache.org/licenses/LICENSE-2.0
 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
 """  # noqa: E501
 
 import logging
@@ -27,7 +27,6 @@
 from ..common.exceptions import IncompatiblePlatformConfigurationError, InvalidSdkError
 from ..common.sdk_utils import detect_sdk, detect_sdk_version
 from .parameters import PlatformParameters
-from .sdk_downloader import download_sdk_debian_file
 
 
 class Platform:
@@ -171,9 +170,9 @@ def _find_base_image(
         try:
             return (
                 False,
-                self._artifact_sources.base_images(sdk_version)[
+                self._artifact_sources.base_image(sdk_version)[
                     platform_parameters.platform_config.value
-                ][platform_parameters.platform.value],
+                ],
             )
         except Exception as ex:
             raise IncompatiblePlatformConfigurationError(
@@ -234,7 +233,10 @@ def _select_sdk_file(
         application_type: ApplicationType,
         holoscan_sdk_file: Optional[Path] = None,
         monai_deploy_sdk_file: Optional[Path] = None,
-    ) -> Tuple[Tuple[bool, Union[Path, str]], Tuple[Union[Path, str, None]]]:
+    ) -> Tuple[
+        Tuple[bool, Union[Path, str]],
+        Tuple[Union[Optional[Path], Optional[str]], Union[Optional[Path], Optional[str]]],
+    ]:
         """
         Detects the SDK distributable to use based on internal mapping or user input.
 
@@ -315,7 +317,7 @@ def _get_holoscan_sdk(
         Returns:
             Tuple[bool, Union[Path, str]]:
                 bool: True when user provides SDk file. Otherwise, False.
-                Union[Path, str]: Path to the SDK redistributable file.
+                Union[Path, str]: User provided SDK file path or package version.
         """
         assert sdk is SdkType.Holoscan
 
@@ -347,39 +349,34 @@ def _get_holoscan_sdk(
                 ApplicationType.PythonModule,
                 ApplicationType.PythonFile,
             ]:
-                return (False, Constants.PYPI_INSTALL_SOURCE)
+                wheel_package_version = self._artifact_sources.wheel_package_version(sdk_version)
+
+                if wheel_package_version is None:
+                    raise InvalidSdkError(
+                        "Unable to locate matching Holoscan SDK PyPI package with "
+                        f"version {sdk_version}."
+                    )
+
+                return (False, wheel_package_version)
             elif application_type in [
                 ApplicationType.CppCMake,
                 ApplicationType.Binary,
             ]:
-                debian_package_source = self._artifact_sources.debian_packages(
-                    sdk_version,
-                    platform_parameters.platform_arch,
-                    platform_parameters.platform_config,
-                )
-                if debian_package_source is not None:
-                    return (
-                        False,
-                        download_sdk_debian_file(
-                            debian_package_source,
-                            sdk_version,
-                            platform_parameters.platform_arch,
-                            temp_dir,
-                            self._logger,
-                            self._artifact_sources,
-                        ),
-                    )
-                else:
+                debian_package_version = self._artifact_sources.debian_package_version(sdk_version)
+
+                if debian_package_version is None:
                     raise InvalidSdkError(
-                        f"No match Debian packages found for Holoscan SDK v{sdk_version}. Try "
-                        "using `--sdk-file` instead."
+                        "Unable to locate matching Holoscan SDK Debian package with "
+                        f"version {sdk_version}."
                     )
 
+                return (False, debian_package_version)
+
             raise InvalidSdkError(f"Unknown application type: {application_type.value}")
 
     def _get_monai_deploy_sdk(
         self, monai_deploy_app_sdk_version: Optional[str], sdk_file: Optional[Path] = None
-    ) -> Tuple[Union[Path, str]]:
+    ) -> Tuple[bool, Union[Optional[Path], Optional[str]]]:
         """
         Validates MONAI Deploy SDK redistributable file if specified.
         Otherwise, Docker build stage will install the SDK from PyPI.
diff --git a/python/holoscan/cli/packager/sdk_downloader.py b/python/holoscan/cli/packager/sdk_downloader.py
deleted file mode 100644
index c5f663c..0000000
--- a/python/holoscan/cli/packager/sdk_downloader.py
+++ /dev/null
@@ -1,147 +0,0 @@
-"""
- SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- SPDX-License-Identifier: Apache-2.0
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""  # noqa: E501
-
-import logging
-import os
-import zipfile
-from io import BytesIO
-from pathlib import Path
-
-import requests
-
-from ..common.artifact_sources import ArtifactSources
-from ..common.enum_types import Arch
-from ..common.exceptions import ExternalAssetDownloadError, InvalidSdkError
-
-
-def download_health_probe_file(
-    sdk_version: str,
-    arch: Arch,
-    temp_dir: str,
-    logger: logging.Logger,
-    artifact_sources: ArtifactSources,
-) -> Path:
-    """Download gRPC health probe for the specified architecture.
-
-    Args:
-        sdk_version (str): SDK version
-        arch (Arch): binary architecture to download
-        temp_dir (str): temporary location for storing downloaded file
-        logger (logging.Logger): logger
-        artifact_sources (ArtifactSources): artifact source
-
-    Raises:
-        ExternalAssetDownloadError: when unable to download gRPC health probe
-
-    Returns:
-        Path: path to the downloaded file
-    """
-    target_dir = os.path.join(temp_dir, arch.name)
-    target_file = os.path.join(target_dir, "grpc_health_probe")
-    if os.path.exists(target_file) and os.path.isfile(target_file):
-        return Path(target_file)
-
-    if not os.path.exists(target_dir):
-        os.mkdir(target_dir)
-
-    try:
-        download_url = artifact_sources.health_probe(sdk_version)[arch.value]
-        logger.info(f"Downloading gRPC health probe from {download_url}...")
-        response = requests.get(download_url)
-        if not response.ok:
-            raise ExternalAssetDownloadError(
-                f"failed to download health probe utility from {download_url} with "
-                "HTTP status {response.status_code}."
-            )
-    except Exception as e:
-        raise ExternalAssetDownloadError(f"error downloading health probe: {e}") from e
-
-    try:
-        logger.info(f"Saving gRPC health probe to {target_file}...")
-        with open(target_file, "wb") as f:
-            f.write(response.content)
-        return Path(target_file)
-    except Exception as e:
-        raise ExternalAssetDownloadError(f"error saving health probe: {e}") from e
-
-
-def download_sdk_debian_file(
-    debian_package_source: str,
-    sdk_version: str,
-    arch: Arch,
-    temp_dir: str,
-    logger: logging.Logger,
-    artifact_sources: ArtifactSources,
-) -> Path:
-    """Download Holoscan SDK Debian package for the specified SDK version and architecture.
-
-    Args:
-        debian_package_source(str): URI to download the Debian package from
-        sdk_version (str): SDK version
-        arch (Arch): Architecture
-        temp_dir (str): temporary location for storing downloaded file
-        logger (logging.Logger): logger
-        artifact_sources (ArtifactSources): artifact source
-
-    Raises:
-        InvalidSdkError: when unable to download the Holoscan SDK Debian package
-
-    Returns:
-        Path: path to the downloaded file
-    """
-    try:
-        logger.info(
-            f"Downloading Holoscan Debian package ({arch.name}) from {debian_package_source}..."
-        )
-        response = requests.get(debian_package_source)
-        if not response.ok:
-            raise InvalidSdkError(
-                f"failed to download SDK from {debian_package_source} with "
-                "HTTP status {response.status_code}."
-            )
-    except Exception as ex:
-        raise InvalidSdkError(
-            f"failed to download SDK from {debian_package_source}: {response.reason}."
-        ) from ex
-
-    if debian_package_source.endswith(".deb"):
-        filename = Path(debian_package_source).name
-        output_dir = os.path.join(temp_dir, f"{sdk_version}_{arch.name}")
-        os.mkdir(output_dir)
-        file_path = os.path.join(output_dir, filename)
-        with open(file_path, "wb") as f:
-            f.write(response.content)
-        logger.info(f"Debian package for {arch.name} downloaded to {file_path}")
-        return Path(file_path)
-    else:
-        try:
-            z = zipfile.ZipFile(BytesIO(response.content))
-            unzip_dir = os.path.join(temp_dir, f"{sdk_version}_{arch.name}")
-            logger.info(f"Extracting Debian Package to {unzip_dir}...")
-            z.extractall(unzip_dir)
-        except Exception as ex:
-            raise InvalidSdkError(
-                f"failed to unzip SDK from {debian_package_source}: {response.reason}."
-            ) from ex
-
-        for file in os.listdir(unzip_dir):
-            if file.endswith(".deb"):
-                file_path = os.path.join(unzip_dir, file)
-                logger.info(f"Debian package for {arch.name} downloaded to {file_path}")
-                return Path(file_path)
-
-        raise InvalidSdkError(f"Debian package not found in {debian_package_source}")
diff --git a/python/holoscan/cli/packager/templates/Dockerfile.jinja2 b/python/holoscan/cli/packager/templates/Dockerfile.jinja2
index 3117d55..39c6321 100644
--- a/python/holoscan/cli/packager/templates/Dockerfile.jinja2
+++ b/python/holoscan/cli/packager/templates/Dockerfile.jinja2
@@ -1,26 +1,29 @@
 {#
-    SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-    SPDX-License-Identifier: Apache-2.0
+SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
 
-    Licensed under the Apache License, Version 2.0 (the "License");
-    you may not use this file except in compliance with the License.
-    You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
 
-    http://www.apache.org/licenses/LICENSE-2.0
+http://www.apache.org/licenses/LICENSE-2.0
 
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
 #}
 
+ARG GPU_TYPE=dgpu
+
 {% if application_type == 'CppCMake' %}
+# Build C++ application in the builder stage
 FROM {{ build_image }} AS builder
 ENV DEBIAN_FRONTEND=noninteractive
 
 RUN apt-get update && \
-    apt-get install -y jq
+    apt-get install -y --no-install-recommends jq
 
 WORKDIR /src
 COPY ./app/* /src
@@ -38,11 +41,109 @@ RUN rm /install/CMakeCache.txt /install/Makefile /install/cmake_install.cmake &&
     rm -r /install/CMakeFiles/ /install/.cmake/
 {% endif %}
 
-FROM {{ base_image }}
 
+
+FROM {{ base_image }} AS base
+
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends --no-install-suggests \
+        curl \
+        jq \
+    && rm -rf /var/lib/apt/lists/*
+
+{% if 'torch' in includes %}
+# Collect torch dependencies: libtorch, torchvision
+FROM base AS torch-dependencies
+
+ARG GPU_TYPE
+ARG TORCHVISION_VERSION=0.16.0_23.08
+ARG LIBTORCH_VERSION=2.1.0_23.08
+
+# Install openmpi
+RUN apt update && \
+    apt-get install -y --no-install-recommends --no-install-suggests \
+        bzip2 \
+        libopenmpi3=4.1.2-* \
+    && rm -rf /var/lib/apt/lists/*
+
+# Download libtorch
+WORKDIR /opt/libtorch/
+RUN ARCH={{ target_arch }} && if [ "$ARCH" = "aarch64" ]; then ARCH="aarch64-${GPU_TYPE}"; fi && \
+    curl -S  -#  -o libtorch.tgz  -L \
+        https://edge.urm.nvidia.com/artifactory/sw-holoscan-thirdparty-generic-local/libtorch/libtorch-${LIBTORCH_VERSION}-${ARCH}.tar.gz
+RUN mkdir -p ${LIBTORCH_VERSION} && \
+    tar -xf libtorch.tgz -C ${LIBTORCH_VERSION} --strip-components 1 && \
+    rm -f libtorch.tgz && \
+    find . -type f -name "*Config.cmake" -exec sed -i '/kineto/d' {} +
+
+# Download torchvision
+WORKDIR /opt/torchvision/
+RUN ARCH={{ target_arch }} && if [ "$ARCH" = "aarch64" ]; then ARCH="aarch64-${GPU_TYPE}"; fi && \
+    curl -S -# -o torchvision.tgz -L \
+        https://edge.urm.nvidia.com/artifactory/sw-holoscan-thirdparty-generic-local/torchvision/torchvision-${TORCHVISION_VERSION}-${ARCH}.tar.gz
+RUN mkdir -p ${TORCHVISION_VERSION}
+RUN tar -xf torchvision.tgz -C ${TORCHVISION_VERSION} --strip-components 1 && \
+    rm -f torchvision.tgz
+
+# Download HPCX for libucc.so.1
+WORKDIR /opt/hpcx
+RUN curl -S -# -o hpcx.tbz -L \
+    https://www.mellanox.com/downloads/hpc/hpc-x/v2.15/hpcx-v2.15-gcc-inbox-ubuntu22.04-cuda12-gdrcopy2-nccl2.17-{{target_arch}}.tbz && \
+    tar -xvjf hpcx.tbz hpcx-v2.15-gcc-inbox-ubuntu22.04-cuda12-gdrcopy2-nccl2.17-{{target_arch}}/ucc/lib/libucc.so.1.0.0 && \
+    rm -f hpcx.tbz && \
+    find . -name libucc.so.1.0.0 -exec mv -f {} /opt/hpcx/libucc.so.1 \;
+# End collect torch dependencies
+{% endif %}
+
+
+{% if 'onnx' in includes %}
+# Collect onnx dependencies
+FROM base AS onnx-dependencies
+ARG GPU_TYPE
+ARG ONNX_RUNTIME_VERSION=1.15.1_23.08
+
+WORKDIR /opt/onnxruntime
+
+# Download onnx binaries
+RUN curl -S -L -# -o ort.tgz \
+    https://edge.urm.nvidia.com/artifactory/sw-holoscan-thirdparty-generic-local/onnxruntime/onnxruntime-${ONNX_RUNTIME_VERSION}-cuda-12.2-$(uname -m).tar.gz
+RUN mkdir -p ${ONNX_RUNTIME_VERSION}
+RUN tar -xf ort.tgz -C ${ONNX_RUNTIME_VERSION} --strip-components 2 && \
+    rm -f ort.tgz5
+WORKDIR /
+# End collect onnx dependencies
+{% endif %}
+
+# FROM base AS mofed-installer
+# ARG MOFED_VERSION=23.10-2.1.3.1
+
+# # In a container, we only need to install the user space libraries, though the drivers are still
+# # needed on the host.
+# # Note: MOFED's installation is not easily portable, so we can't copy the output of this stage
+# # to our final stage, but must inherit from it. For that reason, we keep track of the build/install
+# # only dependencies in the `MOFED_DEPS` variable (parsing the output of `--check-deps-only`) to
+# # remove them in that same layer, to ensure they are not propagated in the final image.
+# WORKDIR /opt/nvidia/mofed
+# ARG MOFED_INSTALL_FLAGS="--dpdk --with-mft --user-space-only --force --without-fw-update"
+# RUN UBUNTU_VERSION=$(cat /etc/lsb-release | grep DISTRIB_RELEASE | cut -d= -f2) \
+#     && OFED_PACKAGE="MLNX_OFED_LINUX-${MOFED_VERSION}-ubuntu${UBUNTU_VERSION}-$(uname -m)" \
+#     && curl -S -# -o ${OFED_PACKAGE}.tgz -L \
+#         https://www.mellanox.com/downloads/ofed/MLNX_OFED-${MOFED_VERSION}/${OFED_PACKAGE}.tgz \
+#     && tar xf ${OFED_PACKAGE}.tgz \
+#     && MOFED_INSTALLER=$(find . -name mlnxofedinstall -type f -executable -print) \
+#     && MOFED_DEPS=$(${MOFED_INSTALLER} ${MOFED_INSTALL_FLAGS} --check-deps-only 2>/dev/null | tail -n1 |  cut -d' ' -f3-) \
+#     && apt-get update \
+#     && apt-get install --no-install-recommends -y ${MOFED_DEPS} \
+#     && ${MOFED_INSTALLER} ${MOFED_INSTALL_FLAGS} \
+#     && rm -r * \
+#     && apt-get remove -y ${MOFED_DEPS} && apt-get autoremove -y \
+#     && rm -rf /var/lib/apt/lists/*
+
+FROM base AS release
 ENV DEBIAN_FRONTEND=noninteractive
 ENV TERM=xterm-256color
 
+ARG GPU_TYPE
 ARG UNAME
 ARG UID
 ARG GID
@@ -59,11 +160,11 @@ LABEL tag="{{ tag }}"
 LABEL org.opencontainers.image.title="{{ title }}"
 LABEL org.opencontainers.image.version="{{ version }}"
 LABEL org.nvidia.holoscan="{{ holoscan_sdk_version }}"
+
 {% if sdk_type == 'monai-deploy' %}
 LABEL org.monai.deploy.app-sdk="{{ monai_deploy_app_sdk_version }}"
 {% endif %}
 
-
 ENV HOLOSCAN_ENABLE_HEALTH_CHECK=true
 ENV HOLOSCAN_INPUT_PATH={{ full_input_path }}
 ENV HOLOSCAN_OUTPUT_PATH={{ full_output_path }}
@@ -76,41 +177,207 @@ ENV HOLOSCAN_CONFIG_PATH={{ config_file_path }}
 ENV HOLOSCAN_APP_MANIFEST_PATH={{ app_json }}
 ENV HOLOSCAN_PKG_MANIFEST_PATH={{ pkg_json }}
 ENV HOLOSCAN_LOGS_PATH={{ logs_dir }}
-ENV PATH=/root/.local/bin:/opt/nvidia/holoscan:$PATH
-ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/libtorch/1.13.1/lib/:/opt/nvidia/holoscan/lib
+ENV HOLOSCAN_VERSION={{ holoscan_sdk_version }}
 
+{% if 'debug' in includes %}
+# Install debugging tools
 RUN apt-get update \
-    && apt-get install -y curl jq \
+    && apt-get install -y --no-install-recommends --no-install-suggests  \
+        build-essential \
+        ccache \
+        gdb \
+        strace \
+        sudo \
     && rm -rf /var/lib/apt/lists/*
+### End install debugging tools
+{% endif %}
+
+
+{% if 'holoviz' in includes %}
+# Install Holoviz dependencies
+RUN apt-get update \
+    && apt-get install --no-install-recommends --no-install-suggests --allow-downgrades --allow-change-held-packages -y \
+    libvulkan1="1.3.204.1-*" \
+    # X11 support \
+    libgl1="1.4.0-*" \
+    # Wayland support \
+    libwayland-client0="1.20.0-*" \
+    libwayland-egl1="1.20.0-*" \
+    libxkbcommon0="1.4.0-*"  \
+    libdecor-0-plugin-1-cairo="0.1.0-*" \
+    libegl1="1.4.0-*" \
+    && rm -rf /var/lib/apt/lists/*
+# End install Holoviz dependencies
+{% endif %}
+
+
+{% if 'torch' in includes %}
+# Install torch dependencies
+ENV PYTHON_VERSION=3.10.6-1~22.04
+ENV PYTHON_PIP_VERSION=22.0.2+dfsg-1ubuntu0.4
+
+RUN apt update \
+&& apt-get install -y --no-install-recommends --no-install-suggests \
+        python3-minimal=${PYTHON_VERSION} \
+        libpython3-stdlib=${PYTHON_VERSION} \
+        python3=${PYTHON_VERSION} \
+        python3-venv=${PYTHON_VERSION} \
+        python3-pip=${PYTHON_PIP_VERSION} \
+        libjpeg-turbo8="2.1.2-*" \
+        libnuma1="2.0.14-*" \
+        libhwloc15="2.7.0-*" \
+        libopenblas0="0.3.20+ds-*" \
+        libevent-core-2.1-7 \
+        libevent-pthreads-2.1-7 \
+        cuda-cupti-12-2 \
+    && rm -rf /var/lib/apt/lists/*
+
+#  mkl - dependency for libtorch plugin on x86_64 (match pytorch container version)
+RUN if [ "{{ cuda_deb_arch }}" = "x86_64" ]; then \
+        python3 -m pip install --no-cache-dir \
+            mkl==2021.1.1 \
+        && \
+        # Clean up duplicate libraries from mkl/tbb python wheel install which makes copies for symlinks.
+        # Only keep the *.so.X libs, remove the *.so and *.so.X.Y libs
+        # This can be removed once upgrading to an MKL pip wheel that fixes the symlinks
+        find /usr/local/lib -maxdepth 1 -type f -regex '.*\/lib\(tbb\|mkl\).*\.so\(\.[0-9]+\.[0-9]+\)?' -exec rm -v {} +; \
+    fi
+
+# Copy Libtorch
+ARG LIBTORCH_VERSION=2.1.0_23.08
+ENV LIBTORCH=/opt/libtorch/${LIBTORCH_VERSION}/lib
+COPY --from=torch-dependencies ${LIBTORCH} ${LIBTORCH}
+
+# Copy TorchVision
+ARG TORCHVISION_VERSION=0.16.0_23.08
+ENV TORCHVISION=/opt/torchvision/${TORCHVISION_VERSION}/lib
+COPY --from=torch-dependencies ${TORCHVISION} ${TORCHVISION}
+
+ENV HPCX=/opt/hpcx/lib
+COPY --from=torch-dependencies /opt/hpcx/libucc.so.1 ${LIBTORCH}/libucc.so.1
+COPY --from=torch-dependencies /usr/lib/{{target_arch}}-linux-gnu/libmpi.so.40 ${LIBTORCH}/libmpi.so.40
+COPY --from=torch-dependencies /usr/lib/{{target_arch}}-linux-gnu/libopen-rte.so.40 ${LIBTORCH}/libopen-rte.so.40
+COPY --from=torch-dependencies /usr/lib/{{target_arch}}-linux-gnu/libopen-pal.so.40 ${LIBTORCH}/libopen-pal.so.40
+
+ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${LIBTORCH}:${TORCHVISION}:${HPCX}
+WORKDIR /
+### End install torch dependencies
+{% endif %}
+
+
+{% if 'onnx' in includes %}
+# Install onnx dependencies
+ARG ONNX_RUNTIME_VERSION=1.15.1_23.08
+ENV ONNX_RUNTIME=/opt/onnxruntime/${ONNX_RUNTIME_VERSION}/lib
+ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${ONNX_RUNTIME}
+
+# Copy ONNX Runtime
+COPY --from=onnx-dependencies ${ONNX_RUNTIME} ${ONNX_RUNTIME}
+
+RUN if [ "${GPU_TYPE}" = "dgpu" ]; then apt-get update \
+    && apt-get install --no-install-recommends --no-install-suggests --allow-downgrades -y \
+        libnvinfer-plugin8="8.6.*+cuda12.0" \
+        libnvonnxparsers8="8.6.*+cuda12.0" \
+        ; fi \
+    && rm -rf /var/lib/apt/lists/* \
+    && rm -f /usr/lib/*/libcudnn*train.so*
+### End install onnx dependencies
+{% endif %}
+
+{% if health_probe is defined %}
+# Install gRPC health probe
+RUN curl -L -o /bin/grpc_health_probe {{ health_probe | pprint }} \
+    && chmod +x /bin/grpc_health_probe && ls -l /bin/grpc_health_probe
+
+HEALTHCHECK --interval=10s --timeout=1s \
+    CMD /bin/grpc_health_probe -addr=:8777 || exit 1
+
+# End install gRPC health probe
+{% endif %}
 
 {% if application_type == 'PythonModule' or application_type == 'PythonFile' %}
-ENV PYTHONPATH="{{ app_dir }}:$PYTHONPATH"
+{% if not 'torch' in includes %}
+# If torch is installed, we can skip installing Python
+ENV PYTHON_VERSION=3.10.6-1~22.04
+ENV PYTHON_PIP_VERSION=22.0.2+dfsg-1ubuntu0.4
+
+RUN apt update \
+    && apt-get install -y --no-install-recommends --no-install-suggests \
+        python3-minimal=${PYTHON_VERSION} \
+        libpython3-stdlib=${PYTHON_VERSION} \
+        python3=${PYTHON_VERSION} \
+        python3-venv=${PYTHON_VERSION} \
+        python3-pip=${PYTHON_PIP_VERSION} \
+    && rm -rf /var/lib/apt/lists/*
+{% endif %}
+
+{% if holoscan_deb_arch == "arm64" %}
+# Requires python3-dev on aarch64
+RUN apt update \
+    && apt-get install -y --no-install-recommends --no-install-suggests \
+        gcc \
+        python3-dev \
+    && rm -rf /var/lib/apt/lists/* 
+{% endif %}
+
 {% endif %}
 
 {% if application_type == 'CppCMake' or application_type == 'Binary' %}
-    {% if custom_base_image == True or custom_holoscan_sdk == True %}
+
+ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/nvidia/holoscan/lib
+
 # Update NV GPG repo key
 # https://developer.nvidia.com/blog/updating-the-cuda-linux-gpg-repository-key/
-RUN if [ $(uname -m) = "aarch64" ]; then ARCH=sbsa; else ARCH=x86_64; fi \
-    && curl -OL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/$ARCH/cuda-keyring_1.0-1_all.deb \
-    && dpkg -i cuda-keyring_1.0-1_all.deb
+RUN curl -OL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/{{ cuda_deb_arch }}/cuda-keyring_1.1-1_all.deb \
+    && dpkg -i cuda-keyring_1.1-1_all.deb \
+    && rm -f cuda-keyring_1.1-1_all.deb \
+    && apt-get update
+
+RUN if [ "{{ holoscan_deb_arch }}" = "arm64" ]; then \
+        GDR_REPO_ARCH=aarch64 ; \
+    else \
+        GDR_REPO_ARCH=x64 ; \
+    fi \
+    && curl -O https://developer.download.nvidia.com/compute/redist/gdrcopy/CUDA%2012.2/ubuntu22_04/${GDR_REPO_ARCH}/libgdrapi_2.4-1_{{ holoscan_deb_arch }}.Ubuntu22_04.deb \
+    && dpkg -i libgdrapi_2.4-1_{{ holoscan_deb_arch }}.Ubuntu22_04.deb \
+    && rm -f libgdrapi_2.4-1_{{ holoscan_deb_arch }}.Ubuntu22_04.deb
+
+{% if custom_holoscan_sdk == True %}
 
 # Use user-specified Holoscan SDK Debian Package
 COPY ./{{ holoscan_sdk_filename }} /tmp/{{ holoscan_sdk_filename }}
-RUN apt-get update \
-    && apt-get install -y /tmp/{{ holoscan_sdk_filename }} \
+RUN apt-get install -y --no-install-recommends --no-install-suggests \
+        /tmp/{{ holoscan_sdk_filename }} \
+    && rm -rf /var/lib/apt/lists/*
+
+{% else %}
+
+# Install Holoscan SDK from NVIDIA APT repository
+# Holoscan: available versions (https://pypi.org/project/holoscan/#history)
+RUN apt-get install -y --no-install-recommends --no-install-suggests \
+        holoscan={{ holoscan_sdk_filename }} \
+    # && apt-get remove -y g++ g++-11 gcc gcc-11 gcc-11-base build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+{% endif %}
+
+{% endif %}
+
+
+{% if holoscan_deb_arch == "arm64" %}
+# Requires libnuma on aarch64
+RUN apt update \
+    && apt-get install -y --no-install-recommends --no-install-suggests \
+        libnuma1="2.0.14-*" \
     && rm -rf /var/lib/apt/lists/*
-    {% else %}
-# Use embedded Holoscan SDK
-{# no op here #}
-    {% endif %}
 {% endif %}
 
+
 RUN groupadd -f -g $GID $UNAME
 RUN useradd -rm -d /home/$UNAME -s /bin/bash -g $GID -G sudo -u $UID $UNAME
-RUN chown -R holoscan {{ working_dir }}
-RUN chown -R holoscan {{ full_input_path }}
-RUN chown -R holoscan {{ full_output_path }}
+RUN chown -R holoscan {{ working_dir }} && \
+    chown -R holoscan {{ full_input_path }} && \
+    chown -R holoscan {{ full_output_path }}
 
 # Set the working directory
 WORKDIR {{ working_dir }}
@@ -119,20 +386,14 @@ WORKDIR {{ working_dir }}
 COPY ./tools {{ working_dir }}/tools
 RUN chmod +x {{ working_dir }}/tools
 
-
-# Copy gRPC health probe
-{% if health_probe is defined %}
-
-RUN curl -L -o /bin/grpc_health_probe {{ health_probe | pprint }} \
-    && chmod +x /bin/grpc_health_probe && ls -l /bin/grpc_health_probe
-
-HEALTHCHECK --interval=10s --timeout=1s \
-    CMD /bin/grpc_health_probe -addr=:8777 || exit 1
-{% endif %}
+# Set the working directory
+WORKDIR {{ working_dir }}
 
 USER $UNAME
 
-ENV PATH=/root/.local/bin:/home/holoscan/.local/bin:/opt/nvidia/holoscan:$PATH
+ENV PATH=/home/${UNAME}/.local/bin:/opt/nvidia/holoscan/bin:$PATH
+ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:{{ app_dir }}:/home/${UNAME}/.local/lib/python3.10/site-packages/holoscan/lib
+ENV PYTHONPATH="{{ app_dir }}:${PYTHONPATH}"
 
 {% if application_type == 'PythonModule' or application_type == 'PythonFile' %}
 COPY ./pip/requirements.txt /tmp/requirements.txt
@@ -140,33 +401,31 @@ COPY ./pip/requirements.txt /tmp/requirements.txt
 RUN pip install --upgrade pip
 RUN pip install --no-cache-dir --user -r /tmp/requirements.txt
 
-{#  Use Holoscan SDK in the image unless specified by the user.
-For MONAI Deploy, the APP SDK will install it unless user specifies the Holoscan SDK file. #}
 {% if sdk_type == 'holoscan' %}
+# Install Holoscan SDK
 
-    {% if custom_holoscan_sdk == True %}
+{% if custom_holoscan_sdk == True %}
 # Copy user-specified Holoscan SDK wheel file
 COPY ./{{ holoscan_sdk_filename }} /tmp/{{ holoscan_sdk_filename }}
 RUN pip install /tmp/{{ holoscan_sdk_filename }}
-    {% elif custom_base_image == True %}
+
+{% else %}
 # Install Holoscan SDK wheel from PyPI
-RUN pip install holoscan=={{ holoscan_sdk_version }}
-    {% else %}
-# Use embedded Holoscan SDK
-{# no op here #}
-    {% endif %}
+RUN pip install holoscan=={{holoscan_sdk_filename}}
+{% endif %}
 {% else %}
-# MONAI Deploy
 
-    {% if custom_monai_deploy_sdk %}
+# Install MONAI Deploy App SDK
+{% if custom_monai_deploy_sdk == True %}
 # Copy user-specified MONAI Deploy SDK file
 COPY ./{{ monai_deploy_sdk_filename }} /tmp/{{ monai_deploy_sdk_filename }}
 RUN pip install /tmp/{{ monai_deploy_sdk_filename }}
-    {% else %}
+{% else %}
+
 # Install MONAI Deploy from PyPI org
 RUN pip install monai-deploy-app-sdk=={{ monai_deploy_app_sdk_version }}
-    {% endif %}
 
+{% endif %}
 {% endif %}
 {% endif %}
 
@@ -174,7 +433,7 @@ RUN pip install monai-deploy-app-sdk=={{ monai_deploy_app_sdk_version }}
 COPY ./models  {{ models_dir }}
 {% endif %}
 
-{%- if docs is defined %}
+{% if docs is defined %}
 COPY ./docs  {{ docs_dir }}
 {% endif %}
 
diff --git a/python/holoscan/core/CMakeLists.txt b/python/holoscan/core/CMakeLists.txt
index ac87975..21c1982 100644
--- a/python/holoscan/core/CMakeLists.txt
+++ b/python/holoscan/core/CMakeLists.txt
@@ -30,6 +30,7 @@ holoscan_pybind11_module(
     io_context.cpp
     io_spec.cpp
     kwarg_handling.cpp
+    metadata.cpp
     network_context.cpp
     operator.cpp
     resource.cpp
diff --git a/python/holoscan/core/__init__.py b/python/holoscan/core/__init__.py
index 64eb18f..de96549 100644
--- a/python/holoscan/core/__init__.py
+++ b/python/holoscan/core/__init__.py
@@ -42,6 +42,8 @@
     holoscan.core.InputContext
     holoscan.core.IOSpec
     holoscan.core.Message
+    holoscan.core.MetadataDictionary
+    holoscan.core.MetadataPolicy
     holoscan.core.NetworkContext
     holoscan.core.Operator
     holoscan.core.OperatorSpec
@@ -83,7 +85,14 @@
     Executor,
 )
 from ._core import Fragment as _Fragment
-from ._core import InputContext, IOSpec, Message, NetworkContext
+from ._core import (
+    InputContext,
+    IOSpec,
+    Message,
+    MetadataDictionary,
+    MetadataPolicy,
+    NetworkContext,
+)
 from ._core import Operator as _Operator
 from ._core import OutputContext, ParameterFlag
 from ._core import PyComponentSpec as ComponentSpec
@@ -127,6 +136,8 @@
     "InputContext",
     "IOSpec",
     "Message",
+    "MetadataDictionary",
+    "MetadataPolicy",
     "NetworkContext",
     "Operator",
     "OperatorSpec",
@@ -145,6 +156,15 @@
 ]
 
 
+# Define custom __repr__ method for MetadataDictionary
+def metadata_repr(self):
+    items = {k: v for k, v in self.items()}
+    return f"{items}"
+
+
+MetadataDictionary.__repr__ = metadata_repr
+
+
 class Application(_Application):
     def __init__(self, argv=None, *args, **kwargs):
         # If no arguments are provided, instead of letting the C++ API initialize the application
diff --git a/python/holoscan/core/core.cpp b/python/holoscan/core/core.cpp
index 7e569cf..c0a2887 100644
--- a/python/holoscan/core/core.cpp
+++ b/python/holoscan/core/core.cpp
@@ -45,6 +45,7 @@ PYBIND11_MODULE(_core, m) {
   init_io_context(m);
   init_execution_context(m);
   init_io_spec(m);
+  init_metadata(m);
   init_operator(m);
   init_scheduler(m);
   init_network_context(m);
diff --git a/python/holoscan/core/core.hpp b/python/holoscan/core/core.hpp
index a15d8e5..afc182e 100644
--- a/python/holoscan/core/core.hpp
+++ b/python/holoscan/core/core.hpp
@@ -36,6 +36,7 @@ namespace holoscan {
 
 void init_component(py::module_&);
 void init_condition(py::module_&);
+void init_metadata(py::module_&);
 void init_network_context(py::module_&);
 void init_resource(py::module_&);
 void init_scheduler(py::module_&);
diff --git a/python/holoscan/core/dl_converter.cpp b/python/holoscan/core/dl_converter.cpp
index ccae260..6f34633 100644
--- a/python/holoscan/core/dl_converter.cpp
+++ b/python/holoscan/core/dl_converter.cpp
@@ -25,29 +25,10 @@
 #include <memory>
 #include <string>
 
+#include "gxf/std/dlpack_utils.hpp"  // nvidia::gxf::numpyTypestr
 #include "holoscan/core/common.hpp"
 #include "holoscan/core/domain/tensor.hpp"
-#include "gxf/std/dlpack_utils.hpp"  // nvidia::gxf::numpyTypestr
-
-namespace {
-
-// A macro like CHECK_CUDA_ERROR from gxf/cuda/cuda_common.h, but it uses Holoscan-style
-// logging and throws an exception instead of returning an nvidia::gxf::Unexpected.
-#define CHECK_CUDA_THROW_ERROR(cu_result, stmt, ...)                                    \
-  do {                                                                                  \
-    cudaError_t err = (cu_result);                                                      \
-    if (err != cudaSuccess) {                                                           \
-      HOLOSCAN_LOG_ERROR("Runtime call {} in line {} of file {} failed with '{}' ({})", \
-                         #stmt,                                                         \
-                         __LINE__,                                                      \
-                         __FILE__,                                                      \
-                         cudaGetErrorString(err),                                       \
-                         err);                                                          \
-      throw std::runtime_error("Error occurred in CUDA runtime API call");              \
-    }                                                                                   \
-  } while (0)
-
-}  // namespace
+#include "holoscan/utils/cuda_macros.hpp"
 
 namespace holoscan {
 
@@ -164,19 +145,15 @@ py::capsule py_dlpack(Tensor* tensor, py::object stream) {
   // Wait for the current stream to finish before the provided stream starts consuming the memory.
   if (stream_id >= 0 && curr_stream_ptr != stream_ptr) {
     cudaEvent_t curr_stream_event;
-    cudaError_t cuda_status;
-
-    cuda_status = cudaEventCreateWithFlags(&curr_stream_event, cudaEventDisableTiming);
-    CHECK_CUDA_THROW_ERROR(cuda_status, "Failure during call to cudaEventCreateWithFlags");
-
-    cuda_status = cudaEventRecord(curr_stream_event, curr_stream_ptr);
-    CHECK_CUDA_THROW_ERROR(cuda_status, "Failure during call to cudaEventRecord");
-
-    cuda_status = cudaStreamWaitEvent(stream_ptr, curr_stream_event, 0);
-    CHECK_CUDA_THROW_ERROR(cuda_status, "Failure during call to cudaStreamWaitEvent");
-
-    cuda_status = cudaEventDestroy(curr_stream_event);
-    CHECK_CUDA_THROW_ERROR(cuda_status, "Failure during call to cudaEventDestroy");
+    HOLOSCAN_CUDA_CALL_THROW_ERROR(
+        cudaEventCreateWithFlags(&curr_stream_event, cudaEventDisableTiming),
+        "Failure during call to cudaEventCreateWithFlags");
+    HOLOSCAN_CUDA_CALL_THROW_ERROR(cudaEventRecord(curr_stream_event, curr_stream_ptr),
+                                   "Failure during call to cudaEventRecord");
+    HOLOSCAN_CUDA_CALL_THROW_ERROR(cudaStreamWaitEvent(stream_ptr, curr_stream_event, 0),
+                                   "Failure during call to cudaStreamWaitEvent");
+    HOLOSCAN_CUDA_CALL_THROW_ERROR(cudaEventDestroy(curr_stream_event),
+                                   "Failure during call to cudaEventDestroy");
   }
 
   DLManagedTensor* dl_managed_tensor = tensor->to_dlpack();
diff --git a/python/holoscan/core/fragment.cpp b/python/holoscan/core/fragment.cpp
index e6fca4d..674168a 100644
--- a/python/holoscan/core/fragment.cpp
+++ b/python/holoscan/core/fragment.cpp
@@ -139,6 +139,10 @@ void init_fragment(py::module_& m) {
            "latency_threshold"_a = kDefaultLatencyThreshold,
            doc::Application::doc_track,
            py::return_value_policy::reference_internal)
+      .def_property("is_metadata_enabled",
+                    py::overload_cast<>(&Fragment::is_metadata_enabled, py::const_),
+                    py::overload_cast<bool>(&Fragment::is_metadata_enabled),
+                    doc::Fragment::doc_is_metadata_enabled)
       .def("run",
            &Fragment::run,
            doc::Fragment::doc_run,
diff --git a/python/holoscan/core/fragment_pydoc.hpp b/python/holoscan/core/fragment_pydoc.hpp
index 2f1dccf..785bca7 100644
--- a/python/holoscan/core/fragment_pydoc.hpp
+++ b/python/holoscan/core/fragment_pydoc.hpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -108,6 +108,10 @@ PYDOC(executor, R"doc(
 Get the executor associated with the fragment.
 )doc")
 
+PYDOC(is_metadata_enabled, R"doc(
+Property to get or set the boolean controlling whether operator metadata transmission is enabled.
+)doc")
+
 PYDOC(from_config, R"doc(
 Retrieve parameters from the associated configuration.
 
diff --git a/python/holoscan/core/io_context.cpp b/python/holoscan/core/io_context.cpp
index 77e4145..18061a0 100644
--- a/python/holoscan/core/io_context.cpp
+++ b/python/holoscan/core/io_context.cpp
@@ -168,7 +168,8 @@ py::object PyInputContext::py_receive(const std::string& name, const std::string
     int counter = 0;
     try {
       for (auto& any_item : any_result) {
-        if (any_item.type() == typeid(nullptr_t)) {
+        auto& item_type = any_item.type();
+        if (item_type == typeid(kNoReceivedMessage) || item_type == typeid(nullptr_t)) {
           // add None to the tuple
           PyTuple_SET_ITEM(result_tuple.ptr(), counter++, py::none().release().ptr());
           continue;
diff --git a/python/holoscan/core/metadata.cpp b/python/holoscan/core/metadata.cpp
new file mode 100644
index 0000000..f0c99ff
--- /dev/null
+++ b/python/holoscan/core/metadata.cpp
@@ -0,0 +1,467 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "gil_guarded_pyobject.hpp"
+#include "holoscan/core/arg.hpp"
+#include "holoscan/core/metadata.hpp"
+#include "kwarg_handling.hpp"
+#include "metadata_pydoc.hpp"
+
+using pybind11::literals::operator""_a;
+
+namespace py = pybind11;
+
+// use a special class to differentiate a default value from Python's None
+class _NoneValue {};
+
+namespace holoscan {
+
+void set_scalar_metadata_via_dtype(const py::object& obj, const py::dtype& dt,
+                                   MetadataObject& out) {
+  std::string dtype_name = dt.attr("name").cast<std::string>();
+  if (dtype_name == "float32") {
+    out.set_value(obj.cast<float>());
+  } else if (dtype_name == "float64") {
+    out.set_value(obj.cast<double>());
+  } else if (dtype_name == "bool") {
+    out.set_value(obj.cast<bool>());
+  } else if (dtype_name == "int8") {
+    out.set_value(obj.cast<int8_t>());
+  } else if (dtype_name == "int16") {
+    out.set_value(obj.cast<int16_t>());
+  } else if (dtype_name == "int32") {
+    out.set_value(obj.cast<int32_t>());
+  } else if (dtype_name == "int64") {
+    out.set_value(obj.cast<int64_t>());
+  } else if (dtype_name == "uint8") {
+    out.set_value(obj.cast<uint8_t>());
+  } else if (dtype_name == "uint16") {
+    out.set_value(obj.cast<uint16_t>());
+  } else if (dtype_name == "uint32") {
+    out.set_value(obj.cast<uint32_t>());
+  } else if (dtype_name == "uint64") {
+    out.set_value(obj.cast<uint64_t>());
+  } else if (dtype_name == "complex64") {
+    out.set_value(obj.cast<std::complex<float>>());
+  } else if (dtype_name == "complex128") {
+    out.set_value(obj.cast<std::complex<double>>());
+  } else {
+    throw std::runtime_error("unsupported dtype: "s + dtype_name);
+  }
+  return;
+}
+
+template <typename T>
+void set_vector_metadata_via_numpy_array(const py::array& obj, MetadataObject& out) {
+  // not intended for images or other large tensors, just
+  // for short arrays containing parameter settings to operators
+  if (obj.attr("ndim").cast<int>() == 1) {
+    std::vector<T> v;
+    v.reserve(obj.attr("size").cast<size_t>());
+    for (auto item : obj) v.push_back(item.cast<T>());
+    out.set_value(v);
+  } else if (obj.attr("ndim").cast<int>() == 2) {
+    std::vector<std::vector<T>> v;
+    std::vector<py::ssize_t> shape = obj.attr("shape").cast<std::vector<py::ssize_t>>();
+    v.reserve(static_cast<size_t>(shape[0]));
+    for (auto item : obj) {
+      std::vector<T> vv;
+      vv.reserve(static_cast<size_t>(shape[1]));
+      for (auto inner_item : item) { vv.push_back(inner_item.cast<T>()); }
+      v.push_back(vv);
+    }
+    out.set_value(v);
+  } else {
+    throw std::runtime_error("Only 1d and 2d NumPy arrays are supported.");
+  }
+}
+
+template <typename T>
+void set_vector_metadata_via_py_sequence(const py::sequence& seq, MetadataObject& out) {
+  // not intended for images or other large tensors, just
+  // for short arrays containing parameter settings to operators
+
+  auto first_item = seq[0];
+  if (py::isinstance<py::sequence>(first_item) && !py::isinstance<py::str>(first_item)) {
+    // Handle list of list and other sequence of sequence types.
+    std::vector<std::vector<T>> v;
+    v.reserve(static_cast<size_t>(py::len(seq)));
+    for (auto item : seq) {
+      std::vector<T> vv;
+      vv.reserve(static_cast<size_t>(py::len(item)));
+      for (auto inner_item : item) { vv.push_back(inner_item.cast<T>()); }
+      v.push_back(vv);
+    }
+    out.set_value(v);
+  } else {
+    // 1d vector to handle a sequence of elements
+    std::vector<T> v;
+    size_t length = py::len(seq);
+    v.reserve(length);
+    for (auto item : seq) v.push_back(item.cast<T>());
+    out.set_value(v);
+  }
+}
+
+void set_vector_metadata_via_iterable(const py::object& obj, MetadataObject& out) {
+  py::sequence seq;
+  if (py::isinstance<py::sequence>(obj)) {
+    seq = obj;
+  } else {
+    // convert other iterables to a list first
+    seq = py::list(obj);
+  }
+
+  if (py::len(seq) == 0) { throw std::runtime_error("sequences of length 0 are not supported."); }
+
+  auto item0 = seq[0];
+  if (py::isinstance<py::sequence>(item0) && !py::isinstance<py::str>(item0)) {
+    py::sequence inner_seq = item0;
+    if (py::len(inner_seq) == 0) {
+      throw std::runtime_error("sequences of length 0 are not supported.");
+    }
+    auto item = inner_seq[0];
+    if (py::isinstance<py::sequence>(item) && !py::isinstance<py::str>(item)) {
+      throw std::runtime_error("Nested sequences of depth > 2 levels are not supported.");
+    }
+    if (py::isinstance<py::bool_>(item)) {
+      set_vector_metadata_via_py_sequence<bool>(seq, out);
+    } else if (py::isinstance<py::int_>(item)) {
+      set_vector_metadata_via_py_sequence<int64_t>(seq, out);
+    } else if (py::isinstance<py::float_>(item)) {
+      set_vector_metadata_via_py_sequence<double>(seq, out);
+    } else if (py::isinstance<py::str>(item)) {
+      set_vector_metadata_via_py_sequence<std::string>(seq, out);
+    } else {
+      throw std::runtime_error("Nested sequence of unsupported type.");
+    }
+  } else {
+    auto item = item0;
+    if (py::isinstance<py::bool_>(item)) {
+      set_vector_metadata_via_py_sequence<bool>(seq, out);
+    } else if (py::isinstance<py::int_>(item)) {
+      set_vector_metadata_via_py_sequence<int64_t>(seq, out);
+    } else if (py::isinstance<py::float_>(item)) {
+      set_vector_metadata_via_py_sequence<double>(seq, out);
+    } else if (py::isinstance<py::str>(item)) {
+      set_vector_metadata_via_py_sequence<std::string>(seq, out);
+    }
+  }
+  return;
+}
+
+void set_vector_metadata_via_dtype(const py::object& obj, const py::dtype& dt,
+                                   MetadataObject& out) {
+  std::string dtype_name = dt.attr("name").cast<std::string>();
+  if (dtype_name == "float32") {
+    set_vector_metadata_via_numpy_array<float>(obj, out);
+  } else if (dtype_name == "float64") {
+    set_vector_metadata_via_numpy_array<double>(obj, out);
+  } else if (dtype_name == "bool") {
+    set_vector_metadata_via_numpy_array<bool>(obj, out);
+  } else if (dtype_name == "int8") {
+    set_vector_metadata_via_numpy_array<int8_t>(obj, out);
+  } else if (dtype_name == "int16") {
+    set_vector_metadata_via_numpy_array<int16_t>(obj, out);
+  } else if (dtype_name == "int32") {
+    set_vector_metadata_via_numpy_array<int32_t>(obj, out);
+  } else if (dtype_name == "int64") {
+    set_vector_metadata_via_numpy_array<int64_t>(obj, out);
+  } else if (dtype_name == "uint8") {
+    set_vector_metadata_via_numpy_array<uint8_t>(obj, out);
+  } else if (dtype_name == "uint16") {
+    set_vector_metadata_via_numpy_array<uint16_t>(obj, out);
+  } else if (dtype_name == "uint32") {
+    set_vector_metadata_via_numpy_array<uint32_t>(obj, out);
+  } else if (dtype_name == "uint64") {
+    set_vector_metadata_via_numpy_array<uint64_t>(obj, out);
+  } else if (dtype_name == "complex64") {
+    set_vector_metadata_via_numpy_array<std::complex<float>>(obj, out);
+  } else if (dtype_name == "complex128") {
+    set_vector_metadata_via_numpy_array<std::complex<double>>(obj, out);
+  } else {
+    throw std::runtime_error("unsupported dtype: "s + dtype_name);
+  }
+  return;
+}
+
+void py_object_to_metadata_object(MetadataObject& meta_obj, const py::object& value,
+                                  const std::optional<py::dtype>& dtype = std::nullopt,
+                                  bool cast_to_cpp = false) {
+  if (cast_to_cpp) {
+    if (py::isinstance<py::str>(value)) {
+      meta_obj.set_value(value.cast<std::string>());
+    } else if (py::isinstance<py::array>(value)) {
+      // handle numpy arrays
+      py::dtype array_dtype = value.cast<py::array>().dtype();
+      set_vector_metadata_via_dtype(value, array_dtype, meta_obj);
+    } else if (py::isinstance<py::iterable>(value) && !py::isinstance<py::dict>(value)) {
+      // does not handle every possible type of iterable (e.g. dict)
+      // will work for any that can be cast to py::list
+      set_vector_metadata_via_iterable(value, meta_obj);
+    } else if (py::isinstance<py::bool_>(value)) {
+      meta_obj.set_value(value.cast<bool>());
+    } else if (py::isinstance<py::int_>(value)) {
+      if (dtype.has_value()) {
+        set_scalar_metadata_via_dtype(value, dtype.value(), meta_obj);
+      } else {
+        meta_obj.set_value(value.cast<int64_t>());
+      }
+    } else if (py::isinstance<py::float_>(value)) {
+      if (dtype.has_value()) {
+        set_scalar_metadata_via_dtype(value, dtype.value(), meta_obj);
+      } else {
+        meta_obj.set_value(value.cast<double>());
+      }
+    } else {
+      throw std::runtime_error(
+          "Cast to C++ is unsupported for this type. Set cast_to_cpp to false to "
+          "send as a Python object");
+    }
+  } else {
+    auto data_ptr = std::make_shared<GILGuardedPyObject>(value);
+    meta_obj.set_value(data_ptr);
+  }
+  return;
+}
+
+py::object metadata_obj_to_pyobject(MetadataObject& meta_obj) {
+  std::any value = meta_obj.value();
+  const auto& id = value.type();
+  // Return a Python objects as-is.
+  if (id == typeid(std::shared_ptr<GILGuardedPyObject>)) {
+    return std::any_cast<std::shared_ptr<GILGuardedPyObject>>(value)->obj();
+  }
+  // For C++ types, support casting T, vector<T>, and vector<vector<<T>> types
+  // where T is either std::string, bool or various integer or floating point types.
+  if (id == typeid(std::string)) {
+    return py::cast(std::any_cast<std::string>(value));
+  } else if (id == typeid(float)) {
+    return py::cast(std::any_cast<float>(value));
+  } else if (id == typeid(double)) {
+    return py::cast(std::any_cast<double>(value));
+  } else if (id == typeid(bool)) {
+    return py::cast(std::any_cast<bool>(value));
+  } else if (id == typeid(int64_t)) {
+    return py::cast(std::any_cast<int64_t>(value));
+  } else if (id == typeid(uint64_t)) {
+    return py::cast(std::any_cast<uint64_t>(value));
+  } else if (id == typeid(int32_t)) {
+    return py::cast(std::any_cast<int32_t>(value));
+  } else if (id == typeid(uint32_t)) {
+    return py::cast(std::any_cast<uint32_t>(value));
+  } else if (id == typeid(int16_t)) {
+    return py::cast(std::any_cast<int16_t>(value));
+  } else if (id == typeid(uint16_t)) {
+    return py::cast(std::any_cast<uint16_t>(value));
+  } else if (id == typeid(int8_t)) {
+    return py::cast(std::any_cast<int8_t>(value));
+  } else if (id == typeid(uint8_t)) {
+    return py::cast(std::any_cast<uint8_t>(value));
+  } else if (id == typeid(std::complex<float>)) {
+    return py::cast(std::any_cast<std::complex<float>>(value));
+  } else if (id == typeid(std::complex<double>)) {
+    return py::cast(std::any_cast<std::complex<double>>(value));
+  } else if (id == typeid(std::vector<std::string>)) {
+    return py::cast(std::any_cast<std::vector<std::string>>(value));
+  } else if (id == typeid(std::vector<float>)) {
+    return py::cast(std::any_cast<std::vector<float>>(value));
+  } else if (id == typeid(std::vector<double>)) {
+    return py::cast(std::any_cast<std::vector<double>>(value));
+  } else if (id == typeid(std::vector<bool>)) {
+    return py::cast(std::any_cast<std::vector<bool>>(value));
+  } else if (id == typeid(std::vector<int64_t>)) {
+    return py::cast(std::any_cast<std::vector<int64_t>>(value));
+  } else if (id == typeid(std::vector<uint64_t>)) {
+    return py::cast(std::any_cast<std::vector<uint64_t>>(value));
+  } else if (id == typeid(std::vector<int32_t>)) {
+    return py::cast(std::any_cast<std::vector<int32_t>>(value));
+  } else if (id == typeid(std::vector<uint32_t>)) {
+    return py::cast(std::any_cast<std::vector<uint32_t>>(value));
+  } else if (id == typeid(std::vector<int16_t>)) {
+    return py::cast(std::any_cast<std::vector<int16_t>>(value));
+  } else if (id == typeid(std::vector<uint16_t>)) {
+    return py::cast(std::any_cast<std::vector<uint16_t>>(value));
+  } else if (id == typeid(std::vector<int8_t>)) {
+    return py::cast(std::any_cast<std::vector<int8_t>>(value));
+  } else if (id == typeid(std::vector<uint8_t>)) {
+    return py::cast(std::any_cast<std::vector<uint8_t>>(value));
+  } else if (id == typeid(std::vector<std::complex<float>>)) {
+    return py::cast(std::any_cast<std::vector<std::complex<float>>>(value));
+  } else if (id == typeid(std::vector<std::complex<double>>)) {
+    return py::cast(std::any_cast<std::vector<std::complex<double>>>(value));
+  } else if (id == typeid(std::vector<std::vector<std::string>>)) {
+    return py::cast(std::any_cast<std::vector<std::vector<std::string>>>(value));
+  } else if (id == typeid(std::vector<std::vector<float>>)) {
+    return py::cast(std::any_cast<std::vector<std::vector<float>>>(value));
+  } else if (id == typeid(std::vector<std::vector<double>>)) {
+    return py::cast(std::any_cast<std::vector<std::vector<double>>>(value));
+  } else if (id == typeid(std::vector<std::vector<bool>>)) {
+    return py::cast(std::any_cast<std::vector<std::vector<bool>>>(value));
+  } else if (id == typeid(std::vector<std::vector<int64_t>>)) {
+    return py::cast(std::any_cast<std::vector<std::vector<int64_t>>>(value));
+  } else if (id == typeid(std::vector<std::vector<uint64_t>>)) {
+    return py::cast(std::any_cast<std::vector<std::vector<uint64_t>>>(value));
+  } else if (id == typeid(std::vector<std::vector<int32_t>>)) {
+    return py::cast(std::any_cast<std::vector<std::vector<int32_t>>>(value));
+  } else if (id == typeid(std::vector<std::vector<uint32_t>>)) {
+    return py::cast(std::any_cast<std::vector<std::vector<uint32_t>>>(value));
+  } else if (id == typeid(std::vector<std::vector<int16_t>>)) {
+    return py::cast(std::any_cast<std::vector<std::vector<int16_t>>>(value));
+  } else if (id == typeid(std::vector<std::vector<uint16_t>>)) {
+    return py::cast(std::any_cast<std::vector<std::vector<uint16_t>>>(value));
+  } else if (id == typeid(std::vector<std::vector<int8_t>>)) {
+    return py::cast(std::any_cast<std::vector<std::vector<int8_t>>>(value));
+  } else if (id == typeid(std::vector<std::vector<uint8_t>>)) {
+    return py::cast(std::any_cast<std::vector<std::vector<uint8_t>>>(value));
+  } else if (id == typeid(std::vector<std::vector<std::complex<float>>>)) {
+    return py::cast(std::any_cast<std::vector<std::vector<std::complex<float>>>>(value));
+  } else if (id == typeid(std::vector<std::vector<std::complex<double>>>)) {
+    return py::cast(std::any_cast<std::vector<std::vector<std::complex<double>>>>(value));
+  } else {
+    return py::none();
+  }
+}
+
+void init_metadata(py::module_& m) {
+  py::class_<_NoneValue>(m, "_NoneValue").def(py::init<>());
+
+  py::enum_<MetadataPolicy>(m, "MetadataPolicy", doc::MetadataPolicy::doc_MetadataPolicy)
+      .value("REJECT", MetadataPolicy::kReject)
+      .value("UPDATE", MetadataPolicy::kUpdate)
+      .value("RAISE", MetadataPolicy::kRaise);
+
+  // MetadataDictionary provides a Python dict-like interface to the C++ MetadataDictionary.
+  py::class_<MetadataDictionary, std::shared_ptr<MetadataDictionary>>(
+      m, "MetadataDictionary", doc::MetadataDictionary::doc_MetadataDictionary)
+      .def(py::init<>(), doc::MetadataDictionary::doc_MetadataDictionary)
+      .def("has_key", &MetadataDictionary::has_key, "key"_a, doc::MetadataDictionary::doc_has_key)
+      .def("__contains__", &MetadataDictionary::has_key)
+      .def("keys", &MetadataDictionary::keys, doc::MetadataDictionary::doc_keys)
+      .def(
+          "get",
+          [](MetadataDictionary& meta_dict,
+             const std::string& key,
+             const py::object& default_value = py::none()) -> py::object {
+            if (!meta_dict.has_key(key)) { return default_value; }
+            auto meta_obj = meta_dict.get(key);
+            return metadata_obj_to_pyobject(*meta_obj);
+          },
+          "key"_a,
+          "value"_a = py::none(),
+          doc::MetadataDictionary::doc_get)
+      .def(
+          "__getitem__",
+          [](MetadataDictionary& meta_dict, const std::string& key) -> py::object {
+            if (!meta_dict.has_key(key)) { throw py::key_error(key); }
+            auto meta_obj = meta_dict.get(key);
+            return metadata_obj_to_pyobject(*meta_obj);
+          },
+          "key"_a)
+      .def(
+          "items",
+          [](MetadataDictionary& meta_dict) -> std::vector<std::pair<std::string, py::object>> {
+            std::vector<std::pair<std::string, py::object>> items;
+            items.reserve(meta_dict.size());
+            for (auto& [key, value] : meta_dict) {
+              items.push_back({key, metadata_obj_to_pyobject(*value)});
+            }
+            return items;
+          },
+          doc::MetadataDictionary::doc_items)
+      .def(
+          "type_dict",
+          [](MetadataDictionary& meta_dict) -> py::dict {
+            py::dict type_dict;
+            for (auto& [key, v] : meta_dict) {
+              type_dict[py::str(key)] = py::str(v->value().type().name());
+            }
+            return type_dict;
+          },
+          doc::MetadataDictionary::doc_type_dict)
+      .def(
+          "pop",
+          [](MetadataDictionary& meta_dict,
+             const std::string& key,
+             const py::object& default_value = py::none()) -> py::object {
+            if (!meta_dict.has_key(key)) {
+              if (py::isinstance<_NoneValue>(default_value)) {
+                throw py::key_error(key);
+              } else {
+                return default_value;
+              }
+            }
+            auto meta_obj = meta_dict.get(key);
+            auto result = metadata_obj_to_pyobject(*meta_obj);
+            meta_dict.erase(key);
+            return result;
+          },
+          "key"_a,
+          "default"_a = _NoneValue(),
+          doc::MetadataDictionary::doc_pop)
+      .def(
+          "set",
+          [](MetadataDictionary& meta_dict,
+             const std::string& key,
+             py::object& value,
+             const std::optional<py::dtype>& dtype = std::nullopt,
+             bool cast_to_cpp = false) {
+            if (!cast_to_cpp) {
+              auto data_ptr = std::make_shared<GILGuardedPyObject>(value);
+              meta_dict.set<std::shared_ptr<GILGuardedPyObject>>(key, data_ptr);
+            } else {
+              auto meta_obj = std::make_shared<MetadataObject>();
+              py_object_to_metadata_object(*meta_obj, value, dtype, cast_to_cpp);
+              meta_dict.set(key, meta_obj);
+            }
+          },
+          "key"_a,
+          "value"_a,
+          "dtype"_a = py::none(),
+          "cast_to_cpp"_a = false,
+          doc::MetadataDictionary::doc_set)
+      .def("__setitem__",
+           [](MetadataDictionary& meta_dict, const std::string& key, py::object& value) {
+             auto data_ptr = std::make_shared<GILGuardedPyObject>(value);
+             meta_dict.set<std::shared_ptr<GILGuardedPyObject>>(key, data_ptr);
+           })
+      .def_property("policy",
+                    py::overload_cast<>(&MetadataDictionary::policy, py::const_),
+                    py::overload_cast<const MetadataPolicy&>(&MetadataDictionary::policy),
+                    doc::MetadataDictionary::doc_policy)
+      .def("size", &MetadataDictionary::size, doc::MetadataDictionary::doc_size)
+      .def("__len__", &MetadataDictionary::size)
+      .def("erase", &MetadataDictionary::erase, "key"_a, doc::MetadataDictionary::doc_erase)
+      .def("__delitem__", &MetadataDictionary::erase, "key"_a)
+      .def("clear", &MetadataDictionary::clear, doc::MetadataDictionary::doc_clear)
+      .def("merge", &MetadataDictionary::merge, "other"_a, doc::MetadataDictionary::doc_merge)
+      .def("insert", &MetadataDictionary::insert, "other"_a, doc::MetadataDictionary::doc_insert)
+      .def("swap", &MetadataDictionary::swap, "other"_a, doc::MetadataDictionary::doc_swap)
+      .def("update", &MetadataDictionary::update, "other"_a, doc::MetadataDictionary::doc_update);
+}
+
+}  // namespace holoscan
diff --git a/python/holoscan/core/metadata_pydoc.hpp b/python/holoscan/core/metadata_pydoc.hpp
new file mode 100644
index 0000000..f80dcb7
--- /dev/null
+++ b/python/holoscan/core/metadata_pydoc.hpp
@@ -0,0 +1,229 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HOLOSCAN_CORE_METADATA_PYDOC_HPP
+#define HOLOSCAN_CORE_METADATA_PYDOC_HPP
+
+#include <string>
+
+#include "../macros.hpp"
+
+namespace holoscan::doc {
+
+namespace MetadataPolicy {
+
+PYDOC(MetadataPolicy, R"doc(
+Enum to define the policy for handling behavior of MetadataDictionary::set when a key already
+exists.
+
+MetadataPolicy.REJECT - Reject the new value if the key already exists
+MetadataPolicy.UPDATE - Update the new value if the key already exists
+MetadataPolicy.RAISE - Raise an exception if the key already exists
+
+)doc")
+}  // namespace MetadataPolicy
+
+namespace MetadataDictionary {
+
+PYDOC(MetadataDictionary, R"doc(
+Class representing a holoscan metadata dictionary.
+)doc")
+
+PYDOC(has_key, R"doc(
+Determine if an item with the given key exists in the dictionary.
+
+Parameters
+----------
+key : str
+    The key to check for in the dictionary.
+
+Returns
+-------
+bool
+    True if the key exists in the dictionary, False otherwise.
+)doc")
+
+PYDOC(keys, R"doc(
+Get a list of the metadata keys in the dictionary.
+
+Returns
+-------
+List[str]
+    A list of the keys in the dictionary.
+)doc")
+
+PYDOC(pop, R"doc(
+Pop the specified item from the dictionary.
+
+Parameters
+----------
+key : str
+    The key to pop from the dictionary.
+default : object, optional
+    The value to return if the key is not found in the dictionary. If not provided, a KeyError will
+    be raised if the specified key does not exist.
+
+Returns
+-------
+value : object
+    The value stored in the dictionary with the given key.
+)doc")
+
+PYDOC(items, R"doc(
+Returns a list of (key, value) tuples for all items in the dictionary.
+
+Returns
+-------
+items : List[Tuple[str, object]]
+    A list of (key, value) tuples for all items in the dictionary.
+)doc")
+
+PYDOC(type_dict, R"doc(
+Returns a list of dictionary of C++ `std::type_info` names corresponding to the values.
+
+Returns
+-------
+type_dict : Dict[str, str]
+    The keys will match those of this MetadataDictionary while the values are the C++ type names
+    corresponding to the values. These type names are mainly relevant for the items stored as C++
+    types. All items with values that are Python objects, will have the name
+    `typeid(GILGuardedPythonObject).name()`.
+)doc")
+
+PYDOC(get, R"doc(
+Get the item with the given key from the dictionary.
+
+Returns
+-------
+object
+    The value stored in the dictionary with the given key.
+)doc")
+
+PYDOC(set, R"doc(
+Store the given value in the dictionary with the given key.
+
+Parameters
+----------
+key : str
+    The key to store the value under.
+value : object
+    The value to set. By default the Python object is directly stored. If the metadata will be sent
+    to a downstream operator that wraps a C++ operator, it may be desirable to instead cast the
+    data to a C++ type. This can be done by setting `cast_to_cpp` to True.
+dtype : numpy.dtype, optional
+    When `cast_to_cpp` is True, the `dtype` argument can be used to indicate what numeric type
+    the values should be cast to. If not provided, the default C++ type will be `double` for a
+    Python `float` and `int64_t` for a Python `int`.
+cast_to_cpp : bool, optional
+    If True, the Python object will be converted to a corresponding C++ type, if possible. If
+    False, the Python object will be stored directly. The types that can be cast are `str`, `bool`
+    and various floating point and integer types. Iterables or sequences with uniform element type
+    will become a std::vector of the contained type.
+)doc")
+
+PYDOC(size, R"doc(
+Get the size of the metadata dictionary.
+
+Returns
+-------
+size : int
+    The number of items in the dictionary.
+)doc")
+
+PYDOC(erase, R"doc(
+Remove the item with the given key from the dictionary.
+
+Parameters
+----------
+key : str
+    The key to check for in the dictionary.
+
+Returns
+-------
+bool
+    True if the key was found and removed, False otherwise.
+)doc")
+
+PYDOC(clear, R"doc(
+Clear all items from the dictionary
+)doc")
+
+PYDOC(insert, R"doc(
+Insert items from another MetadataDictionary into this dictionary.
+
+Parameters
+----------
+other : MetadataDictionary
+    Insert items from other into this dictionary. If a key already exists in this dictionary, the
+    value will not be updated.
+)doc")
+
+PYDOC(merge, R"doc(
+Merge items from another MetadataDictionary into this dictionary.
+
+Parameters
+----------
+other : MetadataDictionary
+    Merge items from `other` into this dictionary. If a key already exists in this dictionary, the
+    value will not be updated. Any items inserted into this dictionary will be removed from `other`.
+)doc")
+
+PYDOC(swap, R"doc(
+Swap the contents of this MetadataDictionary with another one.
+
+Parameters
+----------
+other : MetadataDictionary
+    The metadata dictionary to swap contents with.
+)doc")
+
+PYDOC(update, R"doc(
+Update items in this dictionary with items from another MetadataDictionary.
+
+Parameters
+----------
+other : MetadataDictionary
+    Insert items from other into this dictionary. If a key already exists in this dictionary, the
+    value will be updated in accordance with this dictionary's metadata policy.
+)doc")
+
+PYDOC(policy, R"doc(
+Metadata policy property that governs the behavior of the `set` and `update` methods.
+
+It can be set to one of the following modes:
+
+MetadataPolicy.REJECT - Reject the new value if the key already exists
+MetadataPolicy.UPDATE - Update the new value if the key already exists
+MetadataPolicy.RAISE - Raise an exception if the key already exists
+
+)doc")
+
+PYDOC(MetadataPolicy, R"doc(
+Enum to define the policy for handling behavior of MetadataDictionary::set when a key already
+exists.
+
+MetadataPolicy.REJECT - Reject the new value if the key already exists
+MetadataPolicy.UPDATE - Update the new value if the key already exists
+MetadataPolicy.RAISE - Raise an exception if the key already exists
+
+)doc")
+
+}  // namespace MetadataDictionary
+
+}  // namespace holoscan::doc
+
+#endif /* HOLOSCAN_CORE_METADATA_PYDOC_HPP */
diff --git a/python/holoscan/core/operator.cpp b/python/holoscan/core/operator.cpp
index 37f2ed3..1830ce2 100644
--- a/python/holoscan/core/operator.cpp
+++ b/python/holoscan/core/operator.cpp
@@ -33,6 +33,7 @@
 #include "holoscan/core/expected.hpp"
 #include "holoscan/core/fragment.hpp"
 #include "holoscan/core/gxf/entity.hpp"
+#include "holoscan/core/metadata.hpp"
 #include "holoscan/core/operator.hpp"
 #include "holoscan/core/operator_spec.hpp"
 #include "holoscan/core/resource.hpp"
@@ -145,6 +146,15 @@ void init_operator(py::module_& m) {
       .def_property_readonly("resources", &Operator::resources, doc::Operator::doc_resources)
       .def_property_readonly(
           "operator_type", &Operator::operator_type, doc::Operator::doc_operator_type)
+      .def_property_readonly(
+          "metadata", py::overload_cast<>(&Operator::metadata), doc::Operator::doc_metadata)
+      .def_property_readonly("is_metadata_enabled",
+                             py::overload_cast<>(&Operator::is_metadata_enabled),
+                             doc::Operator::doc_is_metadata_enabled)
+      .def_property("metadata_policy",
+                    py::overload_cast<>(&Operator::metadata_policy, py::const_),
+                    py::overload_cast<MetadataPolicy>(&Operator::metadata_policy),
+                    doc::Operator::doc_metadata_policy)
       .def(
           "resource",
           [](Operator& op, const py::str& name) -> std::optional<py::object> {
@@ -573,13 +583,21 @@ void PyOperator::initialize() {
   // Get the initialize method of the Python Operator class and call it
   py::gil_scoped_acquire scope_guard;
 
+  // Call the parent class's `initialize()` method to set up the operator arguments so that
+  // parameters can be accessed in the `initialize()` method of the Python Operator class.
+  //
+  // In C++, this call is made in the `initialize()` method (usually at the end of the method)
+  // of the inheriting Operator class, using `Operator::initialize()` call.
+  // In Python, the user doesn't have to call the parent class's `initialize()` method explicitly.
+  // If there is a need to initialize something (such as adding arguments), it can be done
+  // directly in the `__init__` method of the Python class inheriting from the Operator class before
+  // calling the parent class's `__init__` method using
+  // `super().__init__(fragment, *args, **kwargs)`.
+  Operator::initialize();
+
   set_py_tracing();
 
   py_initialize_.operator()();
-
-  // Call the parent class's initialize method after invoking the Python Operator's initialize
-  // method.
-  Operator::initialize();
 }
 
 void PyOperator::start() {
diff --git a/python/holoscan/core/operator_pydoc.hpp b/python/holoscan/core/operator_pydoc.hpp
index 50fd27a..b671e5d 100644
--- a/python/holoscan/core/operator_pydoc.hpp
+++ b/python/holoscan/core/operator_pydoc.hpp
@@ -205,6 +205,18 @@ PYDOC(fragment, R"doc(
 The fragment (``holoscan.core.Fragment``) that the operator belongs to.
 )doc")
 
+PYDOC(metadata, R"doc(
+The metadata dictionary (``holoscan.core.MetadataDictionary``) associated with the operator.
+)doc")
+
+PYDOC(is_metadata_enabled, R"doc(
+Boolean indicating whether the fragment this operator belongs to has metadata transmission enabled.
+)doc")
+
+PYDOC(metadata_policy, R"doc(
+The metadata dictionary (``holoscan.core.MetadataPolicy``) associated with the operator.
+)doc")
+
 PYDOC(spec, R"doc(
 The operator spec (``holoscan.core.OperatorSpec``) associated with the operator.
 )doc")
diff --git a/python/holoscan/core/tensor.cpp b/python/holoscan/core/tensor.cpp
index 9e7e403..8548352 100644
--- a/python/holoscan/core/tensor.cpp
+++ b/python/holoscan/core/tensor.cpp
@@ -30,6 +30,7 @@
 #include "dl_converter.hpp"
 #include "gxf/std/dlpack_utils.hpp"  // DLDeviceFromPointer, DLDataTypeFromTypeString
 #include "holoscan/core/domain/tensor.hpp"
+#include "holoscan/utils/cuda_macros.hpp"
 #include "kwarg_handling.hpp"
 #include "tensor.hpp"
 #include "tensor_pydoc.hpp"
@@ -41,22 +42,6 @@ namespace py = pybind11;
 
 namespace {
 
-// A macro like CHECK_CUDA_ERROR from gxf/cuda/cuda_common.h, but it uses Holoscan-style
-// logging and throws an exception instead of returning an nvidia::gxf::Unexpected.
-#define CHECK_CUDA_THROW_ERROR(cu_result, stmt, ...)                                    \
-  do {                                                                                  \
-    cudaError_t err = (cu_result);                                                      \
-    if (err != cudaSuccess) {                                                           \
-      HOLOSCAN_LOG_ERROR("Runtime call {} in line {} of file {} failed with '{}' ({})", \
-                         #stmt,                                                         \
-                         __LINE__,                                                      \
-                         __FILE__,                                                      \
-                         cudaGetErrorString(err),                                       \
-                         err);                                                          \
-      throw std::runtime_error("Error occurred in CUDA runtime API call");              \
-    }                                                                                   \
-  } while (0)
-
 static constexpr const char* dlpack_capsule_name{"dltensor"};
 static constexpr const char* used_dlpack_capsule_name{"used_dltensor"};
 }  // namespace
@@ -470,20 +455,17 @@ std::shared_ptr<PyTensor> PyTensor::from_array_interface(const py::object& obj,
 
     if (stream_id >= 0 && curr_stream_ptr != stream_ptr) {
       cudaEvent_t curr_stream_event;
-      cudaError_t cuda_status;
-
-      cuda_status = cudaEventCreateWithFlags(&curr_stream_event, cudaEventDisableTiming);
-      CHECK_CUDA_THROW_ERROR(cuda_status, "Failure during call to cudaEventCreateWithFlags");
-
-      cuda_status = cudaEventRecord(curr_stream_event, stream_ptr);
-      CHECK_CUDA_THROW_ERROR(cuda_status, "Failure during call to cudaEventRecord");
+      HOLOSCAN_CUDA_CALL_THROW_ERROR(
+          cudaEventCreateWithFlags(&curr_stream_event, cudaEventDisableTiming),
+          "Failure during call to cudaEventCreateWithFlags");
+      HOLOSCAN_CUDA_CALL_THROW_ERROR(cudaEventRecord(curr_stream_event, stream_ptr),
+                                     "Failure during call to cudaEventRecord");
       // Make current stream (curr_stream_ptr) to wait until the given stream (stream_ptr)
       // is finished. This is a reverse of py_dlpack() method.
-      cuda_status = cudaStreamWaitEvent(curr_stream_ptr, curr_stream_event, 0);
-      CHECK_CUDA_THROW_ERROR(cuda_status, "Failure during call to cudaStreamWaitEvent");
-
-      cuda_status = cudaEventDestroy(curr_stream_event);
-      CHECK_CUDA_THROW_ERROR(cuda_status, "Failure during call to cudaEventDestroy");
+      HOLOSCAN_CUDA_CALL_THROW_ERROR(cudaStreamWaitEvent(curr_stream_ptr, curr_stream_event, 0),
+                                     "Failure during call to cudaStreamWaitEvent");
+      HOLOSCAN_CUDA_CALL_THROW_ERROR(cudaEventDestroy(curr_stream_event),
+                                     "Failure during call to cudaEventDestroy");
     }
   }
   // Create DLManagedTensor object
diff --git a/python/holoscan/decorator.py b/python/holoscan/decorator.py
index 762f51b..2d4a51e 100644
--- a/python/holoscan/decorator.py
+++ b/python/holoscan/decorator.py
@@ -432,7 +432,7 @@ def setup(self, spec: OperatorSpec):
 
                 def compute(self, op_input, op_output, context):
                     for port_name, arg_map in self.input_mappings.items():
-                        print(f"input {port_name=}, {arg_map=}")
+                        # print(f"input {port_name=}, {arg_map=}")
                         msg = op_input.receive(port_name)
                         if isinstance(arg_map, str):
                             # print(f"{msg=}")
diff --git a/python/holoscan/operators/CMakeLists.txt b/python/holoscan/operators/CMakeLists.txt
index 50beb47..9fff38f 100644
--- a/python/holoscan/operators/CMakeLists.txt
+++ b/python/holoscan/operators/CMakeLists.txt
@@ -24,6 +24,8 @@ add_subdirectory(holoviz)
 add_subdirectory(inference)
 add_subdirectory(inference_processor)
 add_subdirectory(ping_rx)
+add_subdirectory(ping_tensor_rx)
+add_subdirectory(ping_tensor_tx)
 add_subdirectory(ping_tx)
 add_subdirectory(segmentation_postprocessor)
 add_subdirectory(v4l2_video_capture)
diff --git a/python/holoscan/operators/__init__.py.in b/python/holoscan/operators/__init__.py.in
index d42b7e2..5f9e129 100644
--- a/python/holoscan/operators/__init__.py.in
+++ b/python/holoscan/operators/__init__.py.in
@@ -24,6 +24,8 @@
     holoscan.operators.InferenceOp
     holoscan.operators.InferenceProcessorOp
     holoscan.operators.PingRxOp
+    holoscan.operators.PingTensorRxOp
+    holoscan.operators.PingTensorTxOp
     holoscan.operators.PingTxOp
     holoscan.operators.SegmentationPostprocessorOp
     holoscan.operators.V4L2VideoCaptureOp
@@ -41,6 +43,8 @@ _OPERATOR_MODULES = {
     "inference": ["InferenceOp"],
     "inference_processor": ["InferenceProcessorOp"],
     "ping_rx": ["PingRxOp"],
+    "ping_tensor_rx": ["PingTensorRxOp"],
+    "ping_tensor_tx": ["PingTensorTxOp"],
     "ping_tx": ["PingTxOp"],
     "segmentation_postprocessor": ["SegmentationPostprocessorOp"],
     "v4l2_video_capture": ["V4L2VideoCaptureOp"],
diff --git a/python/holoscan/operators/aja_source/aja_source.cpp b/python/holoscan/operators/aja_source/aja_source.cpp
index d5a6a5b..534531c 100644
--- a/python/holoscan/operators/aja_source/aja_source.cpp
+++ b/python/holoscan/operators/aja_source/aja_source.cpp
@@ -84,14 +84,15 @@ class PyAJASourceOp : public AJASourceOp {
   PyAJASourceOp(
       Fragment* fragment, const py::args& args, const std::string& device = "0"s,
       const std::variant<std::string, NTV2Channel> channel = NTV2Channel::NTV2_CHANNEL1,
-      uint32_t width = 1920, uint32_t height = 1080, uint32_t framerate = 60, bool rdma = false,
-      bool enable_overlay = false,
+      uint32_t width = 1920, uint32_t height = 1080, uint32_t framerate = 60,
+      bool interlaced = false, bool rdma = false, bool enable_overlay = false,
       const std::variant<std::string, NTV2Channel> overlay_channel = NTV2Channel::NTV2_CHANNEL2,
       bool overlay_rdma = true, const std::string& name = "aja_source")
       : AJASourceOp(ArgList{Arg{"device", device},
                             Arg{"width", width},
                             Arg{"height", height},
                             Arg{"framerate", framerate},
+                            Arg{"interlaced", interlaced},
                             Arg{"rdma", rdma},
                             Arg{"enable_overlay", enable_overlay},
                             Arg{"overlay_rdma", overlay_rdma}}) {
@@ -145,6 +146,7 @@ PYBIND11_MODULE(_aja_source, m) {
                     uint32_t,
                     bool,
                     bool,
+                    bool,
                     const std::variant<std::string, NTV2Channel>,
                     bool,
                     const std::string&>(),
@@ -154,13 +156,12 @@ PYBIND11_MODULE(_aja_source, m) {
            "width"_a = 1920,
            "height"_a = 1080,
            "framerate"_a = 60,
+           "interlaced"_a = false,
            "rdma"_a = false,
            "enable_overlay"_a = false,
            "overlay_channel"_a = NTV2Channel::NTV2_CHANNEL2,
            "overlay_rdma"_a = true,
            "name"_a = "aja_source"s,
-           doc::AJASourceOp::doc_AJASourceOp)
-      .def("initialize", &AJASourceOp::initialize, doc::AJASourceOp::doc_initialize)
-      .def("setup", &AJASourceOp::setup, "spec"_a, doc::AJASourceOp::doc_setup);
+           doc::AJASourceOp::doc_AJASourceOp);
 }  // PYBIND11_MODULE NOLINT
 }  // namespace holoscan::ops
diff --git a/python/holoscan/operators/aja_source/pydoc.hpp b/python/holoscan/operators/aja_source/pydoc.hpp
index 64aaf9b..3483bbf 100644
--- a/python/holoscan/operators/aja_source/pydoc.hpp
+++ b/python/holoscan/operators/aja_source/pydoc.hpp
@@ -62,6 +62,9 @@ height : int, optional
     Height of the video stream. Default value is ``1080``.
 framerate : int, optional
     Frame rate of the video stream. Default value is ``60``.
+interlaced : bool, optional
+    Whether or not the video is an interlaced format. Default value is ``False``
+    (``"false"`` in YAML).
 rdma : bool, optional
     Boolean indicating whether RDMA is enabled. Default value is ``False`` (``"false"`` in YAML).
 enable_overlay : bool, optional
@@ -77,22 +80,6 @@ name : str, optional (constructor only)
     The name of the operator. Default value is ``"aja_source"``.
 )doc")
 
-PYDOC(setup, R"doc(
-Define the operator specification.
-
-Parameters
-----------
-spec : holoscan.core.OperatorSpec
-    The operator specification.
-)doc")
-
-PYDOC(initialize, R"doc(
-Initialize the operator.
-
-This method is called only once when the operator is created for the first time,
-and uses a light-weight initialization.
-)doc")
-
 }  // namespace holoscan::doc::AJASourceOp
 
 #endif /* PYHOLOSCAN_OPERATORS_AJA_SOURCE_PYDOC_HPP */
diff --git a/python/holoscan/operators/bayer_demosaic/bayer_demosaic.cpp b/python/holoscan/operators/bayer_demosaic/bayer_demosaic.cpp
index e5694eb..0f702d2 100644
--- a/python/holoscan/operators/bayer_demosaic/bayer_demosaic.cpp
+++ b/python/holoscan/operators/bayer_demosaic/bayer_demosaic.cpp
@@ -111,8 +111,6 @@ PYBIND11_MODULE(_bayer_demosaic, m) {
            "generate_alpha"_a = false,
            "alpha_value"_a = 255,
            "name"_a = "bayer_demosaic"s,
-           doc::BayerDemosaicOp::doc_BayerDemosaicOp)
-      .def("initialize", &BayerDemosaicOp::initialize, doc::BayerDemosaicOp::doc_initialize)
-      .def("setup", &BayerDemosaicOp::setup, "spec"_a, doc::BayerDemosaicOp::doc_setup);
+           doc::BayerDemosaicOp::doc_BayerDemosaicOp);
 }  // PYBIND11_MODULE NOLINT
 }  // namespace holoscan::ops
diff --git a/python/holoscan/operators/bayer_demosaic/pydoc.hpp b/python/holoscan/operators/bayer_demosaic/pydoc.hpp
index bc60ae6..f2d1859 100644
--- a/python/holoscan/operators/bayer_demosaic/pydoc.hpp
+++ b/python/holoscan/operators/bayer_demosaic/pydoc.hpp
@@ -103,22 +103,6 @@ name : str, optional (constructor only)
     The name of the operator. Default value is ``"bayer_demosaic"``.
 )doc")
 
-PYDOC(initialize, R"doc(
-Initialize the operator.
-
-This method is called only once when the operator is created for the first time,
-and uses a light-weight initialization.
-)doc")
-
-PYDOC(setup, R"doc(
-Define the operator specification.
-
-Parameters
-----------
-spec : holoscan.core.OperatorSpec
-    The operator specification.
-)doc")
-
 }  // namespace holoscan::doc::BayerDemosaicOp
 
 #endif /* PYHOLOSCAN_OPERATORS_BAYER_DEMOSAIC_PYDOC_HPP */
diff --git a/python/holoscan/operators/format_converter/format_converter.cpp b/python/holoscan/operators/format_converter/format_converter.cpp
index 43bdf70..d3d6b61 100644
--- a/python/holoscan/operators/format_converter/format_converter.cpp
+++ b/python/holoscan/operators/format_converter/format_converter.cpp
@@ -131,8 +131,6 @@ PYBIND11_MODULE(_format_converter, m) {
            "out_channel_order"_a = std::vector<int>{},
            "cuda_stream_pool"_a = py::none(),
            "name"_a = "format_converter"s,
-           doc::FormatConverterOp::doc_FormatConverterOp)
-      .def("initialize", &FormatConverterOp::initialize, doc::FormatConverterOp::doc_initialize)
-      .def("setup", &FormatConverterOp::setup, "spec"_a, doc::FormatConverterOp::doc_setup);
+           doc::FormatConverterOp::doc_FormatConverterOp);
 }  // PYBIND11_MODULE NOLINT
 }  // namespace holoscan::ops
diff --git a/python/holoscan/operators/format_converter/pydoc.hpp b/python/holoscan/operators/format_converter/pydoc.hpp
index 101159c..485b0fd 100644
--- a/python/holoscan/operators/format_converter/pydoc.hpp
+++ b/python/holoscan/operators/format_converter/pydoc.hpp
@@ -134,22 +134,6 @@ name : str, optional (constructor only)
     The name of the operator. Default value is ``"format_converter"``.
 )doc")
 
-PYDOC(initialize, R"doc(
-Initialize the operator.
-
-This method is called only once when the operator is created for the first time,
-and uses a light-weight initialization.
-)doc")
-
-PYDOC(setup, R"doc(
-Define the operator specification.
-
-Parameters
-----------
-spec : holoscan.core.OperatorSpec
-    The operator specification.
-)doc")
-
 }  // namespace holoscan::doc::FormatConverterOp
 
 #endif /* PYHOLOSCAN_OPERATORS_FORMAT_CONVERTER_PYDOC_HPP */
diff --git a/python/holoscan/operators/gxf_codelet/gxf_codelet.cpp b/python/holoscan/operators/gxf_codelet/gxf_codelet.cpp
index beea225..fafd553 100644
--- a/python/holoscan/operators/gxf_codelet/gxf_codelet.cpp
+++ b/python/holoscan/operators/gxf_codelet/gxf_codelet.cpp
@@ -111,10 +111,6 @@ PYBIND11_MODULE(_gxf_codelet, m) {
            "fragment"_a,
            "gxf_typename"_a,
            "name"_a = "gxf_codelet"s,
-           doc::GXFCodeletOp::doc_GXFCodeletOp)
-      .def_property_readonly(
-          "gxf_typename", &GXFCodeletOp::gxf_typename, doc::GXFCodeletOp::doc_gxf_typename)
-      .def("initialize", &GXFCodeletOp::initialize, doc::GXFCodeletOp::doc_initialize)
-      .def("setup", &GXFCodeletOp::setup, doc::GXFCodeletOp::doc_setup);
+           doc::GXFCodeletOp::doc_GXFCodeletOp);
 }  // PYBIND11_MODULE NOLINT
 }  // namespace holoscan::ops
diff --git a/python/holoscan/operators/gxf_codelet/pydoc.hpp b/python/holoscan/operators/gxf_codelet/pydoc.hpp
index 6171ec1..cce7ced 100644
--- a/python/holoscan/operators/gxf_codelet/pydoc.hpp
+++ b/python/holoscan/operators/gxf_codelet/pydoc.hpp
@@ -58,32 +58,6 @@ name : str, optional (constructor only)
    specifically part of the ``nvidia::gxf::Receiver`` nor the    ``nvidia::gxf::Transmitter``
    components. These parameters can provide further customization and functionality to the operator.
 )doc")
-
-PYDOC(gxf_typename, R"doc(
-The GXF type name of the resource.
-
-Returns
--------
-str
-    The GXF type name of the resource
-)doc")
-
-PYDOC(initialize, R"doc(
-Initialize the operator.
-
-This method is called only once when the operator is created for the first time,
-and uses a light-weight initialization.
-)doc")
-
-PYDOC(setup, R"doc(
-Define the operator specification.
-
-Parameters
-----------
-spec : holoscan.core.OperatorSpec
-    The operator specification.
-)doc")
-
 }  // namespace holoscan::doc::GXFCodeletOp
 
 #endif /* PYHOLOSCAN_OPERATORS_GXF_CODELET_PYDOC_HPP */
diff --git a/python/holoscan/operators/holoviz/__init__.py b/python/holoscan/operators/holoviz/__init__.py
index 0885bfa..545683c 100644
--- a/python/holoscan/operators/holoviz/__init__.py
+++ b/python/holoscan/operators/holoviz/__init__.py
@@ -81,6 +81,16 @@
     "b8g8r8a8_srgb": _HolovizOp.ImageFormat.B8G8R8A8_SRGB,
     "a8b8g8r8_unorm_pack32": _HolovizOp.ImageFormat.A8B8G8R8_UNORM_PACK32,
     "a8b8g8r8_srgb_pack32": _HolovizOp.ImageFormat.A8B8G8R8_SRGB_PACK32,
+    "y8u8y8v8_422_unorm": _HolovizOp.ImageFormat.Y8U8Y8V8_422_UNORM,
+    "u8y8v8y8_422_unorm": _HolovizOp.ImageFormat.U8Y8V8Y8_422_UNORM,
+    "y8_u8v8_2plane_420_unorm": _HolovizOp.ImageFormat.Y8_U8V8_2PLANE_420_UNORM,
+    "y8_u8v8_2plane_422_unorm": _HolovizOp.ImageFormat.Y8_U8V8_2PLANE_422_UNORM,
+    "y8_u8_v8_3plane_420_unorm": _HolovizOp.ImageFormat.Y8_U8_V8_3PLANE_420_UNORM,
+    "y8_u8_v8_3plane_422_unorm": _HolovizOp.ImageFormat.Y8_U8_V8_3PLANE_422_UNORM,
+    "y16_u16v16_2plane_420_unorm": _HolovizOp.ImageFormat.Y16_U16V16_2PLANE_420_UNORM,
+    "y16_u16v16_2plane_422_unorm": _HolovizOp.ImageFormat.Y16_U16V16_2PLANE_422_UNORM,
+    "y16_u16_v16_3plane_420_unorm": _HolovizOp.ImageFormat.Y16_U16_V16_3PLANE_420_UNORM,
+    "y16_u16_v16_3plane_422_unorm": _HolovizOp.ImageFormat.Y16_U16_V16_3PLANE_422_UNORM,
 }
 
 _holoviz_str_to_depth_map_render_mode = {
@@ -89,6 +99,22 @@
     "triangles": _HolovizOp.DepthMapRenderMode.TRIANGLES,
 }
 
+_holoviz_str_to_yuv_model_conversion = {
+    "yuv_601": _HolovizOp.YuvModelConversion.YUV_601,
+    "yuv_709": _HolovizOp.YuvModelConversion.YUV_709,
+    "yuv_2020": _HolovizOp.YuvModelConversion.YUV_2020,
+}
+
+_holoviz_str_to_yuv_range = {
+    "itu_full": _HolovizOp.YuvRange.ITU_FULL,
+    "itu_narrow": _HolovizOp.YuvRange.ITU_NARROW,
+}
+
+_holoviz_str_to_chroma_location = {
+    "cosited_even": _HolovizOp.ChromaLocation.COSITED_EVEN,
+    "midpoint": _HolovizOp.ChromaLocation.MIDPOINT,
+}
+
 
 class HolovizOp(_HolovizOp):
     def __init__(
@@ -172,6 +198,10 @@ def __init__(
                 "line_width",
                 "point_size",
                 "text",
+                "yuv_model_conversion",
+                "yuv_range",
+                "x_chroma_location",
+                "y_chroma_location",
                 "depth_map_render_mode",
                 "views",
             }
@@ -221,6 +251,7 @@ def __init__(
             else:
                 text = []
             ispec.text = tensor.get("text", text)
+
             if "depth_map_render_mode" in tensor:
                 depth_map_render_mode = tensor["depth_map_render_mode"]
                 if isinstance(depth_map_render_mode, str):
@@ -242,6 +273,82 @@ def __init__(
                 depth_map_render_mode = _HolovizOp.DepthMapRenderMode.POINTS
             ispec.depth_map_render_mode = depth_map_render_mode
 
+            if "yuv_model_conversion" in tensor:
+                yuv_model_conversion = tensor["yuv_model_conversion"]
+                if isinstance(yuv_model_conversion, str):
+                    yuv_model_conversion.lower()
+                    if yuv_model_conversion not in _holoviz_str_to_yuv_model_conversion:
+                        raise ValueError(
+                            f"unrecognized yuv_model_conversion name: {yuv_model_conversion}"
+                        )
+                    yuv_model_conversion = _holoviz_str_to_yuv_model_conversion[
+                        yuv_model_conversion
+                    ]
+                elif not isinstance(input_type, _HolovizOp.YuvModelConversion):
+                    raise ValueError(
+                        "value corresponding to key 'yuv_model_conversion' must be either a "
+                        "HolovizOp.YuvModelConversion object or one of the following "
+                        f"strings: {tuple(_holoviz_str_to_yuv_model_conversion.keys())}"
+                    )
+            else:
+                yuv_model_conversion = _HolovizOp.YuvModelConversion.YUV_601
+            ispec.yuv_model_conversion = yuv_model_conversion
+
+            if "yuv_range" in tensor:
+                yuv_range = tensor["yuv_range"]
+                if isinstance(yuv_range, str):
+                    yuv_range.lower()
+                    if yuv_range not in _holoviz_str_to_yuv_range:
+                        raise ValueError(f"unrecognized yuv_range name: {yuv_range}")
+                    yuv_range = _holoviz_str_to_yuv_range[yuv_range]
+                elif not isinstance(input_type, _HolovizOp.YuvRange):
+                    raise ValueError(
+                        "value corresponding to key 'yuv_range' must be either a "
+                        "HolovizOp.YuvRange object or one of the following "
+                        f"strings: {tuple(_holoviz_str_to_yuv_range.keys())}"
+                    )
+            else:
+                yuv_range = _HolovizOp.YuvRange.ITU_FULL
+            ispec.yuv_range = yuv_range
+
+            if "x_chroma_location" in tensor:
+                x_chroma_location = tensor["x_chroma_location"]
+                if isinstance(x_chroma_location, str):
+                    x_chroma_location.lower()
+                    if x_chroma_location not in _holoviz_str_to_chroma_location:
+                        raise ValueError(
+                            f"unrecognized x_chroma_location name: {x_chroma_location}"
+                        )
+                    x_chroma_location = _holoviz_str_to_chroma_location[x_chroma_location]
+                elif not isinstance(input_type, _HolovizOp.ChromaLocation):
+                    raise ValueError(
+                        "value corresponding to key 'x_chroma_location' must be either a "
+                        "HolovizOp.ChromaLocation object or one of the following "
+                        f"strings: {tuple(_holoviz_str_to_chroma_location.keys())}"
+                    )
+            else:
+                x_chroma_location = _HolovizOp.ChromaLocation.COSITED_EVEN
+            ispec.x_chroma_location = x_chroma_location
+
+            if "y_chroma_location" in tensor:
+                y_chroma_location = tensor["y_chroma_location"]
+                if isinstance(y_chroma_location, str):
+                    y_chroma_location.lower()
+                    if y_chroma_location not in _holoviz_str_to_chroma_location:
+                        raise ValueError(
+                            f"unrecognized y_chroma_location name: {y_chroma_location}"
+                        )
+                    y_chroma_location = _holoviz_str_to_chroma_location[y_chroma_location]
+                elif not isinstance(input_type, _HolovizOp.ChromaLocation):
+                    raise ValueError(
+                        "value corresponding to key 'y_chroma_location' must be either a "
+                        "HolovizOp.ChromaLocation object or one of the following "
+                        f"strings: {tuple(_holoviz_str_to_chroma_location.keys())}"
+                    )
+            else:
+                y_chroma_location = _HolovizOp.ChromaLocation.COSITED_EVEN
+            ispec.y_chroma_location = y_chroma_location
+
             ispec.views = tensor.get("views", [])
 
             tensor_input_specs.append(ispec)
diff --git a/python/holoscan/operators/holoviz/holoviz.cpp b/python/holoscan/operators/holoviz/holoviz.cpp
index b620720..3ac1366 100644
--- a/python/holoscan/operators/holoviz/holoviz.cpp
+++ b/python/holoscan/operators/holoviz/holoviz.cpp
@@ -138,61 +138,58 @@ PYBIND11_MODULE(_holoviz, m) {
 
   py::class_<HolovizOp, PyHolovizOp, Operator, std::shared_ptr<HolovizOp>> holoviz_op(
       m, "HolovizOp", doc::HolovizOp::doc_HolovizOp);
-  holoviz_op
-      .def(py::init<Fragment*,
-                    const py::args&,
-                    std::shared_ptr<::holoscan::Allocator>,
-                    std::vector<holoscan::IOSpec*>,
-                    const std::vector<HolovizOp::InputSpec>&,
-                    const std::vector<std::vector<float>>&,
-                    const std::string&,
-                    const std::string&,
-                    uint32_t,
-                    uint32_t,
-                    uint32_t,
-                    bool,
-                    bool,
-                    bool,
-                    bool,
-                    bool,
-                    bool,
-                    bool,
-                    bool,
-                    const std::string&,
-                    const std::array<float, 3>&,
-                    const std::array<float, 3>&,
-                    const std::array<float, 3>&,
-                    const std::string&,
-                    std::shared_ptr<holoscan::CudaStreamPool>,
-                    const std::string&>(),
-           "fragment"_a,
-           "allocator"_a,
-           "receivers"_a = std::vector<holoscan::IOSpec*>(),
-           "tensors"_a = std::vector<HolovizOp::InputSpec>(),
-           "color_lut"_a = std::vector<std::vector<float>>(),
-           "window_title"_a = "Holoviz",
-           "display_name"_a = "",
-           "width"_a = 1920,
-           "height"_a = 1080,
-           "framerate"_a = 60,
-           "use_exclusive_display"_a = false,
-           "fullscreen"_a = false,
-           "headless"_a = false,
-           "framebuffer_srgb"_a = false,
-           "vsync"_a = false,
-           "enable_render_buffer_input"_a = false,
-           "enable_render_buffer_output"_a = false,
-           "enable_camera_pose_output"_a = false,
-           "camera_pose_output_type"_a = "projection_matrix",
-           "camera_eye"_a = std::array<float, 3>{0.f, 0.f, 1.f},
-           "camera_look_at"_a = std::array<float, 3>{0.f, 0.f, 0.f},
-           "camera_up"_a = std::array<float, 3>{0.f, 1.f, 1.f},
-           "font_path"_a = "",
-           "cuda_stream_pool"_a = py::none(),
-           "name"_a = "holoviz_op"s,
-           doc::HolovizOp::doc_HolovizOp)
-      .def("initialize", &HolovizOp::initialize, doc::HolovizOp::doc_initialize)
-      .def("setup", &HolovizOp::setup, "spec"_a, doc::HolovizOp::doc_setup);
+  holoviz_op.def(py::init<Fragment*,
+                          const py::args&,
+                          std::shared_ptr<::holoscan::Allocator>,
+                          std::vector<holoscan::IOSpec*>,
+                          const std::vector<HolovizOp::InputSpec>&,
+                          const std::vector<std::vector<float>>&,
+                          const std::string&,
+                          const std::string&,
+                          uint32_t,
+                          uint32_t,
+                          uint32_t,
+                          bool,
+                          bool,
+                          bool,
+                          bool,
+                          bool,
+                          bool,
+                          bool,
+                          bool,
+                          const std::string&,
+                          const std::array<float, 3>&,
+                          const std::array<float, 3>&,
+                          const std::array<float, 3>&,
+                          const std::string&,
+                          std::shared_ptr<holoscan::CudaStreamPool>,
+                          const std::string&>(),
+                 "fragment"_a,
+                 "allocator"_a,
+                 "receivers"_a = std::vector<holoscan::IOSpec*>(),
+                 "tensors"_a = std::vector<HolovizOp::InputSpec>(),
+                 "color_lut"_a = std::vector<std::vector<float>>(),
+                 "window_title"_a = "Holoviz",
+                 "display_name"_a = "",
+                 "width"_a = 1920,
+                 "height"_a = 1080,
+                 "framerate"_a = 60,
+                 "use_exclusive_display"_a = false,
+                 "fullscreen"_a = false,
+                 "headless"_a = false,
+                 "framebuffer_srgb"_a = false,
+                 "vsync"_a = false,
+                 "enable_render_buffer_input"_a = false,
+                 "enable_render_buffer_output"_a = false,
+                 "enable_camera_pose_output"_a = false,
+                 "camera_pose_output_type"_a = "projection_matrix",
+                 "camera_eye"_a = std::array<float, 3>{0.f, 0.f, 1.f},
+                 "camera_look_at"_a = std::array<float, 3>{0.f, 0.f, 0.f},
+                 "camera_up"_a = std::array<float, 3>{0.f, 1.f, 1.f},
+                 "font_path"_a = "",
+                 "cuda_stream_pool"_a = py::none(),
+                 "name"_a = "holoviz_op"s,
+                 doc::HolovizOp::doc_HolovizOp);
 
   py::enum_<HolovizOp::InputType>(holoviz_op, "InputType")
       .value("UNKNOWN", HolovizOp::InputType::UNKNOWN)
@@ -246,7 +243,30 @@ PYBIND11_MODULE(_holoviz, m) {
       .value("B8G8R8A8_UNORM", HolovizOp::ImageFormat::B8G8R8A8_UNORM)
       .value("B8G8R8A8_SRGB", HolovizOp::ImageFormat::B8G8R8A8_SRGB)
       .value("A8B8G8R8_UNORM_PACK32", HolovizOp::ImageFormat::A8B8G8R8_UNORM_PACK32)
-      .value("A8B8G8R8_SRGB_PACK32", HolovizOp::ImageFormat::A8B8G8R8_SRGB_PACK32);
+      .value("A8B8G8R8_SRGB_PACK32", HolovizOp::ImageFormat::A8B8G8R8_SRGB_PACK32)
+      .value("Y8U8Y8V8_422_UNORM", HolovizOp::ImageFormat::Y8U8Y8V8_422_UNORM)
+      .value("U8Y8V8Y8_422_UNORM", HolovizOp::ImageFormat::U8Y8V8Y8_422_UNORM)
+      .value("Y8_U8V8_2PLANE_420_UNORM", HolovizOp::ImageFormat::Y8_U8V8_2PLANE_420_UNORM)
+      .value("Y8_U8V8_2PLANE_422_UNORM", HolovizOp::ImageFormat::Y8_U8V8_2PLANE_422_UNORM)
+      .value("Y8_U8_V8_3PLANE_420_UNORM", HolovizOp::ImageFormat::Y8_U8_V8_3PLANE_420_UNORM)
+      .value("Y8_U8_V8_3PLANE_422_UNORM", HolovizOp::ImageFormat::Y8_U8_V8_3PLANE_422_UNORM)
+      .value("Y16_U16V16_2PLANE_420_UNORM", HolovizOp::ImageFormat::Y16_U16V16_2PLANE_420_UNORM)
+      .value("Y16_U16V16_2PLANE_422_UNORM", HolovizOp::ImageFormat::Y16_U16V16_2PLANE_422_UNORM)
+      .value("Y16_U16_V16_3PLANE_420_UNORM", HolovizOp::ImageFormat::Y16_U16_V16_3PLANE_420_UNORM)
+      .value("Y16_U16_V16_3PLANE_422_UNORM", HolovizOp::ImageFormat::Y16_U16_V16_3PLANE_422_UNORM);
+
+  py::enum_<HolovizOp::YuvModelConversion>(holoviz_op, "YuvModelConversion")
+      .value("YUV_601", HolovizOp::YuvModelConversion::YUV_601)
+      .value("YUV_709", HolovizOp::YuvModelConversion::YUV_709)
+      .value("YUV_2020", HolovizOp::YuvModelConversion::YUV_2020);
+
+  py::enum_<HolovizOp::YuvRange>(holoviz_op, "YuvRange")
+      .value("ITU_FULL", HolovizOp::YuvRange::ITU_FULL)
+      .value("ITU_NARROW", HolovizOp::YuvRange::ITU_NARROW);
+
+  py::enum_<HolovizOp::ChromaLocation>(holoviz_op, "ChromaLocation")
+      .value("COSITED_EVEN", HolovizOp::ChromaLocation::COSITED_EVEN)
+      .value("MIDPOINT", HolovizOp::ChromaLocation::MIDPOINT);
 
   py::enum_<HolovizOp::DepthMapRenderMode>(holoviz_op, "DepthMapRenderMode")
       .value("POINTS", HolovizOp::DepthMapRenderMode::POINTS)
@@ -267,6 +287,10 @@ PYBIND11_MODULE(_holoviz, m) {
       .def_readwrite("line_width", &HolovizOp::InputSpec::line_width_)
       .def_readwrite("point_size", &HolovizOp::InputSpec::point_size_)
       .def_readwrite("text", &HolovizOp::InputSpec::text_)
+      .def_readwrite("yuv_model_conversion", &HolovizOp::InputSpec::yuv_model_conversion_)
+      .def_readwrite("yuv_range", &HolovizOp::InputSpec::yuv_range_)
+      .def_readwrite("x_chroma_location", &HolovizOp::InputSpec::x_chroma_location_)
+      .def_readwrite("y_chroma_location", &HolovizOp::InputSpec::y_chroma_location_)
       .def_readwrite("depth_map_render_mode", &HolovizOp::InputSpec::depth_map_render_mode_)
       .def_readwrite("views", &HolovizOp::InputSpec::views_)
       .def("description",
diff --git a/python/holoscan/operators/holoviz/pydoc.hpp b/python/holoscan/operators/holoviz/pydoc.hpp
index c7704e5..b1c29e6 100644
--- a/python/holoscan/operators/holoviz/pydoc.hpp
+++ b/python/holoscan/operators/holoviz/pydoc.hpp
@@ -250,6 +250,44 @@ The details of the dictionary is as follows:
 - **text**: array of text strings, used when ``type`` is text (default: ``[]``)
 
   - type: ``List[str]``
+- **yuv_model_conversion**: YUV model conversion (default: ``yuv_601``)
+
+  - type: ``str``
+  - possible values:
+
+    - **yuv_601**: color model conversion from YUV to RGB defined in BT.601
+    - **yuv_709**: color model conversion from YUV to RGB defined in BT.709
+    - **yuv_2020**: color model conversion from YUV to RGB defined in BT.2020
+- **yuv_range**: YUV range (default: ``itu_full``)
+
+  - type: ``str``
+  - possible values:
+
+    - **itu_full**: the full range of the encoded values are valid and interpreted according
+        to the ITU “full range” quantization rules
+    - **itu_narrow**: headroom and foot room are reserved in the numerical range of encoded
+        values, and the remaining values are expanded according to the ITU “narrow range”
+        quantization rules
+- **x_chroma_location**: Location of downsampled chroma component samples relative to the luma
+    samples. (default: ``cosited_even``)
+
+  - type: ``str``
+  - possible values:
+
+    - **cosited_even**: downsampled chroma samples are aligned with luma samples with even
+        coordinates
+    - **midpoint**: downsampled chroma samples are located half way between each even
+        luma sample and the nearest higher odd luma sample
+- **y_chroma_location**: Location of downsampled chroma component samples relative to the luma
+    samples. (default: ``cosited_even``)
+
+  - type: ``str``
+  - possible values:
+
+    - **cosited_even**: downsampled chroma samples are aligned with luma samples with even
+        coordinates
+    - **midpoint**: downsampled chroma samples are located half way between each even
+        luma sample and the nearest higher odd luma sample
 - **depth_map_render_mode**: depth map render mode (default: ``points``)
 
   - type: ``str``
@@ -311,7 +349,16 @@ The details of the dictionary is as follows:
    - `"b8g8r8a8_unorm"`
    - `"b8g8r8a8_srgb"`
    - `"a8b8g8r8_unorm_pack32"`
-   - `"a8b8g8r8_srgb_pack32"`
+   - `"y8u8y8v8_422_unorm"`
+   - `"u8y8v8y8_422_unorm"`
+   - `"y8_u8v8_2plane_420_unorm"`
+   - `"y8_u8v8_2plane_422_unorm"`
+   - `"y8_u8_v8_3plane_420_unorm"`
+   - `"y8_u8_v8_3plane_422_unorm"`
+   - `"y16_u16v16_2plane_420_unorm"`
+   - `"y16_u16v16_2plane_422_unorm"`
+   - `"y16_u16_v16_3plane_420_unorm"`
+   - `"y16_u16_v16_3plane_422_unorm"`
 
    When the ``type`` parameter is set to ``color_lut`` the final color is looked up using the values
    from the ``color_lut`` parameter. For color lookups these image formats are supported
@@ -391,22 +438,6 @@ The details of the dictionary is as follows:
    The rendered framebuffer can be output to ``render_buffer_output``.
 )doc")
 
-PYDOC(initialize, R"doc(
-Initialize the operator.
-
-This method is called only once when the operator is created for the first time,
-and uses a light-weight initialization.
-)doc")
-
-PYDOC(setup, R"doc(
-Define the operator specification.
-
-Parameters
-----------
-spec : holoscan.core.OperatorSpec
-    The operator specification.
-)doc")
-
 }  // namespace holoscan::doc::HolovizOp
 
 namespace holoscan::doc::HolovizOp::Pose3D {
@@ -457,6 +488,14 @@ point_size : float
     Point size for geometry made of points.
 text : sequence of str
     Sequence of strings used when type is `HolovizOp.InputType.TEXT`.
+yuv_model_conversion : holoscan.operators.HolovizOp.YuvModelConversion
+    YUV model conversion.
+yuv_range : holoscan.operators.HolovizOp.YuvRange
+    YUV range.
+x_chroma_location : holoscan.operators.HolovizOp.ChromaLocation
+    chroma location in x direction for formats which are chroma downsampled in width (420 and 422).
+y_chroma_location : holoscan.operators.HolovizOp.ChromaLocation
+    chroma location in y direction for formats which are chroma downsampled in height (420).
 depth_map_render_mode : holoscan.operators.HolovizOp.DepthMapRenderMode
     The depth map render mode. Used only if `type` is `HolovizOp.InputType.DEPTH_MAP` or
     `HolovizOp.InputType.DEPTH_MAP_COLOR`.
diff --git a/python/holoscan/operators/inference/inference.cpp b/python/holoscan/operators/inference/inference.cpp
index bcd840d..9da1138 100644
--- a/python/holoscan/operators/inference/inference.cpp
+++ b/python/holoscan/operators/inference/inference.cpp
@@ -185,53 +185,50 @@ PYBIND11_MODULE(_inference, m) {
   py::class_<InferenceOp, PyInferenceOp, Operator, std::shared_ptr<InferenceOp>> inference_op(
       m, "InferenceOp", doc::InferenceOp::doc_InferenceOp);
 
-  inference_op
-      .def(py::init<Fragment*,
-                    const py::args&,
-                    const std::string&,
-                    std::shared_ptr<::holoscan::Allocator>,
-                    py::dict,
-                    py::dict,
-                    py::dict,
-                    py::dict,
-                    py::dict,
-                    py::dict,
-                    py::dict,
-                    const std::vector<std::string>&,
-                    const std::vector<std::string>&,
-                    bool,
-                    bool,
-                    bool,
-                    bool,
-                    bool,
-                    bool,
-                    bool,
-                    std::shared_ptr<holoscan::CudaStreamPool>,
-                    const std::string&>(),
-           "fragment"_a,
-           "backend"_a,
-           "allocator"_a,
-           "inference_map"_a,
-           "model_path_map"_a,
-           "pre_processor_map"_a,
-           "device_map"_a = py::dict(),
-           "temporal_map"_a = py::dict(),
-           "activation_map"_a = py::dict(),
-           "backend_map"_a = py::dict(),
-           "in_tensor_names"_a = std::vector<std::string>{},
-           "out_tensor_names"_a = std::vector<std::string>{},
-           "infer_on_cpu"_a = false,
-           "parallel_inference"_a = true,
-           "input_on_cuda"_a = true,
-           "output_on_cuda"_a = true,
-           "transmit_on_cuda"_a = true,
-           "enable_fp16"_a = false,
-           "is_engine_path"_a = false,
-           "cuda_stream_pool"_a = py::none(),
-           "name"_a = "inference"s,
-           doc::InferenceOp::doc_InferenceOp)
-      .def("initialize", &InferenceOp::initialize, doc::InferenceOp::doc_initialize)
-      .def("setup", &InferenceOp::setup, "spec"_a, doc::InferenceOp::doc_setup);
+  inference_op.def(py::init<Fragment*,
+                            const py::args&,
+                            const std::string&,
+                            std::shared_ptr<::holoscan::Allocator>,
+                            py::dict,
+                            py::dict,
+                            py::dict,
+                            py::dict,
+                            py::dict,
+                            py::dict,
+                            py::dict,
+                            const std::vector<std::string>&,
+                            const std::vector<std::string>&,
+                            bool,
+                            bool,
+                            bool,
+                            bool,
+                            bool,
+                            bool,
+                            bool,
+                            std::shared_ptr<holoscan::CudaStreamPool>,
+                            const std::string&>(),
+                   "fragment"_a,
+                   "backend"_a,
+                   "allocator"_a,
+                   "inference_map"_a,
+                   "model_path_map"_a,
+                   "pre_processor_map"_a,
+                   "device_map"_a = py::dict(),
+                   "temporal_map"_a = py::dict(),
+                   "activation_map"_a = py::dict(),
+                   "backend_map"_a = py::dict(),
+                   "in_tensor_names"_a = std::vector<std::string>{},
+                   "out_tensor_names"_a = std::vector<std::string>{},
+                   "infer_on_cpu"_a = false,
+                   "parallel_inference"_a = true,
+                   "input_on_cuda"_a = true,
+                   "output_on_cuda"_a = true,
+                   "transmit_on_cuda"_a = true,
+                   "enable_fp16"_a = false,
+                   "is_engine_path"_a = false,
+                   "cuda_stream_pool"_a = py::none(),
+                   "name"_a = "inference"s,
+                   doc::InferenceOp::doc_InferenceOp);
 
   py::class_<InferenceOp::DataMap>(inference_op, "DataMap")
       .def(py::init<>())
diff --git a/python/holoscan/operators/inference/pydoc.hpp b/python/holoscan/operators/inference/pydoc.hpp
index 4455bd1..3c9a1df 100644
--- a/python/holoscan/operators/inference/pydoc.hpp
+++ b/python/holoscan/operators/inference/pydoc.hpp
@@ -102,22 +102,6 @@ name : str, optional (constructor only)
     The name of the operator. Default value is ``"inference"``.
 )doc")
 
-PYDOC(initialize, R"doc(
-Initialize the operator.
-
-This method is called only once when the operator is created for the first time,
-and uses a light-weight initialization.
-)doc")
-
-PYDOC(setup, R"doc(
-Define the operator specification.
-
-Parameters
-----------
-spec : holoscan.core.OperatorSpec
-    The operator specification.
-)doc")
-
 }  // namespace holoscan::doc::InferenceOp
 
 #endif /* PYHOLOSCAN_OPERATORS_INFERENCE_PYDOC_HPP */
diff --git a/python/holoscan/operators/inference_processor/inference_processor.cpp b/python/holoscan/operators/inference_processor/inference_processor.cpp
index 527eec1..1a3c395 100644
--- a/python/holoscan/operators/inference_processor/inference_processor.cpp
+++ b/python/holoscan/operators/inference_processor/inference_processor.cpp
@@ -151,39 +151,34 @@ PYBIND11_MODULE(_inference_processor, m) {
       inference_processor_op(
           m, "InferenceProcessorOp", doc::InferenceProcessorOp::doc_InferenceProcessorOp);
 
-  inference_processor_op
-      .def(py::init<Fragment*,
-                    const py::args&,
-                    std::shared_ptr<::holoscan::Allocator>,
-                    py::dict,
-                    py::dict,
-                    const std::vector<std::string>&,
-                    const std::vector<std::string>&,
-                    bool,
-                    bool,
-                    bool,
-                    bool,
-                    std::shared_ptr<holoscan::CudaStreamPool>,
-                    const std::string&,
-                    const std::string&>(),
-           "fragment"_a,
-           "allocator"_a,
-           "process_operations"_a = py::dict(),
-           "processed_map"_a = py::dict(),
-           "in_tensor_names"_a = std::vector<std::string>{},
-           "out_tensor_names"_a = std::vector<std::string>{},
-           "input_on_cuda"_a = false,
-           "output_on_cuda"_a = false,
-           "transmit_on_cuda"_a = false,
-           "disable_transmitter"_a = false,
-           "cuda_stream_pool"_a = py::none(),
-           "config_path"_a = ""s,
-           "name"_a = "postprocessor"s,
-           doc::InferenceProcessorOp::doc_InferenceProcessorOp)
-      .def("initialize",
-           &InferenceProcessorOp::initialize,
-           doc::InferenceProcessorOp::doc_initialize)
-      .def("setup", &InferenceProcessorOp::setup, "spec"_a, doc::InferenceProcessorOp::doc_setup);
+  inference_processor_op.def(py::init<Fragment*,
+                                      const py::args&,
+                                      std::shared_ptr<::holoscan::Allocator>,
+                                      py::dict,
+                                      py::dict,
+                                      const std::vector<std::string>&,
+                                      const std::vector<std::string>&,
+                                      bool,
+                                      bool,
+                                      bool,
+                                      bool,
+                                      std::shared_ptr<holoscan::CudaStreamPool>,
+                                      const std::string&,
+                                      const std::string&>(),
+                             "fragment"_a,
+                             "allocator"_a,
+                             "process_operations"_a = py::dict(),
+                             "processed_map"_a = py::dict(),
+                             "in_tensor_names"_a = std::vector<std::string>{},
+                             "out_tensor_names"_a = std::vector<std::string>{},
+                             "input_on_cuda"_a = false,
+                             "output_on_cuda"_a = false,
+                             "transmit_on_cuda"_a = false,
+                             "disable_transmitter"_a = false,
+                             "cuda_stream_pool"_a = py::none(),
+                             "config_path"_a = ""s,
+                             "name"_a = "postprocessor"s,
+                             doc::InferenceProcessorOp::doc_InferenceProcessorOp);
 
   py::class_<InferenceProcessorOp::DataMap>(inference_processor_op, "DataMap")
       .def(py::init<>())
diff --git a/python/holoscan/operators/inference_processor/pydoc.hpp b/python/holoscan/operators/inference_processor/pydoc.hpp
index 171b39a..81df7d6 100644
--- a/python/holoscan/operators/inference_processor/pydoc.hpp
+++ b/python/holoscan/operators/inference_processor/pydoc.hpp
@@ -83,22 +83,6 @@ name : str, optional (constructor only)
     The name of the operator. Default value is ``"postprocessor"``.
 )doc")
 
-PYDOC(initialize, R"doc(
-Initialize the operator.
-
-This method is called only once when the operator is created for the first time,
-and uses a light-weight initialization.
-)doc")
-
-PYDOC(setup, R"doc(
-Define the operator specification.
-
-Parameters
-----------
-spec : holoscan.core.OperatorSpec
-    The operator specification.
-)doc")
-
 }  // namespace holoscan::doc::InferenceProcessorOp
 
 #endif /* PYHOLOSCAN_OPERATORS_INFERENCE_PROCESSOR_PYDOC_HPP */
diff --git a/python/holoscan/operators/ping_tensor_rx/CMakeLists.txt b/python/holoscan/operators/ping_tensor_rx/CMakeLists.txt
new file mode 100644
index 0000000..dfe0bf0
--- /dev/null
+++ b/python/holoscan/operators/ping_tensor_rx/CMakeLists.txt
@@ -0,0 +1,21 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+holoscan_pybind11_module(ping_tensor_rx
+    ping_tensor_rx.cpp
+)
+target_link_libraries(ping_tensor_rx_python
+    PUBLIC holoscan::ops::ping_tensor_rx
+)
\ No newline at end of file
diff --git a/python/holoscan/operators/ping_tensor_rx/__init__.py b/python/holoscan/operators/ping_tensor_rx/__init__.py
new file mode 100644
index 0000000..35cd862
--- /dev/null
+++ b/python/holoscan/operators/ping_tensor_rx/__init__.py
@@ -0,0 +1,22 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""  # noqa: E501
+
+import holoscan.core  # noqa: F401
+
+from ._ping_tensor_rx import PingTensorRxOp
+
+__all__ = ["PingTensorRxOp"]
diff --git a/python/holoscan/operators/ping_tensor_rx/ping_tensor_rx.cpp b/python/holoscan/operators/ping_tensor_rx/ping_tensor_rx.cpp
new file mode 100644
index 0000000..bc1090e
--- /dev/null
+++ b/python/holoscan/operators/ping_tensor_rx/ping_tensor_rx.cpp
@@ -0,0 +1,83 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <pybind11/pybind11.h>
+
+#include <memory>
+#include <string>
+
+#include "../operator_util.hpp"
+#include "./pydoc.hpp"
+
+#include "holoscan/core/fragment.hpp"
+#include "holoscan/core/operator.hpp"
+#include "holoscan/core/operator_spec.hpp"
+#include "holoscan/operators/ping_tensor_rx/ping_tensor_rx.hpp"
+
+using std::string_literals::operator""s;
+using pybind11::literals::operator""_a;
+
+#define STRINGIFY(x) #x
+#define MACRO_STRINGIFY(x) STRINGIFY(x)
+
+namespace py = pybind11;
+
+namespace holoscan::ops {
+
+/* Trampoline class for handling Python kwargs
+ *
+ * These add a constructor that takes a Fragment for which to initialize the operator.
+ * The explicit parameter list and default arguments take care of providing a Pythonic
+ * kwarg-based interface with appropriate default values matching the operator's
+ * default parameters in the C++ API `setup` method.
+ *
+ * The sequence of events in this constructor is based on Fragment::make_operator<OperatorT>
+ */
+class PyPingTensorRxOp : public holoscan::ops::PingTensorRxOp {
+ public:
+  /* Inherit the constructors */
+  using PingTensorRxOp::PingTensorRxOp;
+
+  // Define a constructor that fully initializes the object.
+  PyPingTensorRxOp(Fragment* fragment, const py::args& args,
+                   const std::string& name = "ping_tensor_rx")
+      : PingTensorRxOp() {
+    add_positional_condition_and_resource_args(this, args);
+    name_ = name;
+    fragment_ = fragment;
+    spec_ = std::make_shared<OperatorSpec>(fragment);
+    setup(*spec_.get());
+  }
+};
+
+/* The python module */
+
+PYBIND11_MODULE(_ping_tensor_rx, m) {
+  m.doc() = R"pbdoc(
+        Holoscan SDK PingTensorRxOp Python Bindings
+        --------------------------------------------------
+        .. currentmodule:: _ping_tensor_rx
+    )pbdoc";
+
+  py::class_<PingTensorRxOp, PyPingTensorRxOp, Operator, std::shared_ptr<PingTensorRxOp>>(
+      m, "PingTensorRxOp", doc::PingTensorRxOp::doc_PingTensorRxOp)
+      .def(py::init<Fragment*, const py::args&, const std::string&>(),
+           "fragment"_a,
+           "name"_a = "ping_tensor_rx"s,
+           doc::PingTensorRxOp::doc_PingTensorRxOp);
+}  // PYBIND11_MODULE NOLINT
+}  // namespace holoscan::ops
diff --git a/python/holoscan/operators/ping_tensor_rx/pydoc.hpp b/python/holoscan/operators/ping_tensor_rx/pydoc.hpp
new file mode 100644
index 0000000..240afca
--- /dev/null
+++ b/python/holoscan/operators/ping_tensor_rx/pydoc.hpp
@@ -0,0 +1,46 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PYHOLOSCAN_OPERATORS_PING_TENSOR_RX_PYDOC_HPP
+#define PYHOLOSCAN_OPERATORS_PING_TENSOR_RX_PYDOC_HPP
+
+#include <string>
+
+#include "../../macros.hpp"
+
+namespace holoscan::doc::PingTensorRxOp {
+
+// PyPingTensorRxOp Constructor
+PYDOC(PingTensorRxOp, R"doc(
+Example tensor receive operator.
+
+**==Named Inputs==**
+
+    in : nvidia::gxf::TensorMap
+        A message containing any number of host or device tensors.
+
+Parameters
+----------
+fragment : holoscan.core.Fragment (constructor only)
+    The fragment that the operator belongs to.
+name : str, optional (constructor only)
+    The name of the operator. Default value is ``"ping_tensor_rx"``.
+)doc")
+
+}  // namespace holoscan::doc::PingTensorRxOp
+
+#endif /* PYHOLOSCAN_OPERATORS_PING_TENSOR_RX_PYDOC_HPP */
diff --git a/python/holoscan/operators/ping_tensor_tx/CMakeLists.txt b/python/holoscan/operators/ping_tensor_tx/CMakeLists.txt
new file mode 100644
index 0000000..7e9ae3c
--- /dev/null
+++ b/python/holoscan/operators/ping_tensor_tx/CMakeLists.txt
@@ -0,0 +1,21 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+holoscan_pybind11_module(ping_tensor_tx
+    ping_tensor_tx.cpp
+)
+target_link_libraries(ping_tensor_tx_python
+    PUBLIC holoscan::ops::ping_tensor_tx
+)
\ No newline at end of file
diff --git a/python/holoscan/operators/ping_tensor_tx/__init__.py b/python/holoscan/operators/ping_tensor_tx/__init__.py
new file mode 100644
index 0000000..b19eb3c
--- /dev/null
+++ b/python/holoscan/operators/ping_tensor_tx/__init__.py
@@ -0,0 +1,22 @@
+"""
+SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""  # noqa: E501
+
+import holoscan.core  # noqa: F401
+
+from ._ping_tensor_tx import PingTensorTxOp
+
+__all__ = ["PingTensorTxOp"]
diff --git a/python/holoscan/operators/ping_tensor_tx/ping_tensor_tx.cpp b/python/holoscan/operators/ping_tensor_tx/ping_tensor_tx.cpp
new file mode 100644
index 0000000..b3412f9
--- /dev/null
+++ b/python/holoscan/operators/ping_tensor_tx/ping_tensor_tx.cpp
@@ -0,0 +1,159 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <pybind11/numpy.h>  // for py::dtype
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>  // for std::optional, std::variant
+
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <string>
+#include <variant>
+
+#include "../operator_util.hpp"
+#include "./pydoc.hpp"
+
+#include "holoscan/core/fragment.hpp"
+#include "holoscan/core/operator.hpp"
+#include "holoscan/core/operator_spec.hpp"
+#include "holoscan/operators/ping_tensor_tx/ping_tensor_tx.hpp"
+#include "holoscan/core/resources/gxf/allocator.hpp"
+
+using std::string_literals::operator""s;
+using pybind11::literals::operator""_a;
+
+#define STRINGIFY(x) #x
+#define MACRO_STRINGIFY(x) STRINGIFY(x)
+
+namespace py = pybind11;
+
+namespace holoscan::ops {
+
+/* Trampoline class for handling Python kwargs
+ *
+ * These add a constructor that takes a Fragment for which to initialize the operator.
+ * The explicit parameter list and default arguments take care of providing a Pythonic
+ * kwarg-based interface with appropriate default values matching the operator's
+ * default parameters in the C++ API `setup` method.
+ *
+ * The sequence of events in this constructor is based on Fragment::make_operator<OperatorT>
+ */
+class PyPingTensorTxOp : public holoscan::ops::PingTensorTxOp {
+ public:
+  /* Inherit the constructors */
+  using PingTensorTxOp::PingTensorTxOp;
+
+  // Define a constructor that fully initializes the object.
+  PyPingTensorTxOp(Fragment* fragment, const py::args& args,
+                   std::optional<std::shared_ptr<::holoscan::Allocator>> allocator = std::nullopt,
+                   const std::string& storage_type = "system"s,
+                   std::optional<int32_t> batch_size = std::nullopt, int32_t rows = 32,
+                   std::optional<int32_t> columns = 64,
+                   std::optional<int32_t> channels = std::nullopt,
+                   const std::variant<std::string, py::dtype> dtype = "uint8_t",
+                   const std::string& tensor_name = "tensor",
+                   const std::string& name = "ping_tensor_tx")
+      : PingTensorTxOp(ArgList{Arg{"storage_type", storage_type},
+                               Arg{"rows", rows},
+                               Arg{"tensor_name", tensor_name}}) {
+    add_positional_condition_and_resource_args(this, args);
+    if (allocator.has_value()) { this->add_arg(Arg{"allocator", allocator.value()}); }
+    if (batch_size.has_value()) { this->add_arg(Arg{"batch_size", batch_size.value()}); }
+    if (batch_size.has_value()) { this->add_arg(Arg{"columns", columns.value()}); }
+    if (batch_size.has_value()) { this->add_arg(Arg{"channels", channels.value()}); }
+    if (std::holds_alternative<std::string>(dtype)) {
+      this->add_arg(Arg("data_type", std::get<std::string>(dtype)));
+    } else {
+      auto dt = std::get<py::dtype>(dtype);
+      std::string data_type;
+      std::string dtype_name = dt.attr("name").cast<std::string>();
+      if (dtype_name == "float16") {  // currently promoting float16 scalars to float
+        data_type = "float";
+      } else if (dtype_name == "float32") {
+        data_type = "float";
+      } else if (dtype_name == "float64") {
+        data_type = "double";
+      } else if (dtype_name == "bool") {
+        data_type = "uint8_t";
+      } else if (dtype_name == "int8") {
+        data_type = "int8_t";
+      } else if (dtype_name == "int16") {
+        data_type = "int16_t";
+      } else if (dtype_name == "int32") {
+        data_type = "int32_t";
+      } else if (dtype_name == "int64") {
+        data_type = "int64_t";
+      } else if (dtype_name == "uint8") {
+        data_type = "uint8_t";
+      } else if (dtype_name == "uint16") {
+        data_type = "uint16_t";
+      } else if (dtype_name == "uint32") {
+        data_type = "uint32_t";
+      } else if (dtype_name == "uint64") {
+        data_type = "uint64_t";
+      } else if (dtype_name == "complex64") {
+        data_type = "complex<float>";
+      } else if (dtype_name == "complex128") {
+        data_type = "complex<double>";
+      } else {
+        throw std::runtime_error(fmt::format("unsupported numpy dtype with name: {}", dtype_name));
+      }
+      this->add_arg(Arg("data_type", data_type));
+    }
+    name_ = name;
+    fragment_ = fragment;
+    spec_ = std::make_shared<OperatorSpec>(fragment);
+    setup(*spec_.get());
+  }
+};
+
+/* The python module */
+
+PYBIND11_MODULE(_ping_tensor_tx, m) {
+  m.doc() = R"pbdoc(
+        Holoscan SDK PingTensorTxOp Python Bindings
+        --------------------------------------------------
+        .. currentmodule:: _ping_tensor_tx
+    )pbdoc";
+
+  py::class_<PingTensorTxOp, PyPingTensorTxOp, Operator, std::shared_ptr<PingTensorTxOp>>(
+      m, "PingTensorTxOp", doc::PingTensorTxOp::doc_PingTensorTxOp)
+      .def(py::init<Fragment*,
+                    const py::args&,
+                    std::optional<std::shared_ptr<::holoscan::Allocator>>,
+                    const std::string&,
+                    std::optional<int32_t>,
+                    int32_t,
+                    std::optional<int32_t>,
+                    std::optional<int32_t>,
+                    const std::variant<std::string, py::dtype>,
+                    const std::string&,
+                    const std::string&>(),
+           "fragment"_a,
+           "allocator"_a = py::none(),
+           "storage_type"_a = "system"s,
+           "batch_size"_a = py::none(),
+           "rows"_a = 32,
+           "columns"_a = 64,
+           "channels"_a = py::none(),
+           "dtype"_a = "uint8_t"s,
+           "tensor_name"_a = "tensor"s,
+           "name"_a = "video_stream_replayer"s,
+           doc::PingTensorTxOp::doc_PingTensorTxOp);
+}  // PYBIND11_MODULE NOLINT
+}  // namespace holoscan::ops
diff --git a/python/holoscan/operators/ping_tensor_tx/pydoc.hpp b/python/holoscan/operators/ping_tensor_tx/pydoc.hpp
new file mode 100644
index 0000000..20748da
--- /dev/null
+++ b/python/holoscan/operators/ping_tensor_tx/pydoc.hpp
@@ -0,0 +1,77 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PYHOLOSCAN_OPERATORS_PING_TENSOR_TX_PYDOC_HPP
+#define PYHOLOSCAN_OPERATORS_PING_TENSOR_TX_PYDOC_HPP
+
+#include <string>
+
+#include "../../macros.hpp"
+
+namespace holoscan::doc::PingTensorTxOp {
+
+// PingTensorTxOp Constructor
+PYDOC(PingTensorTxOp, R"doc(
+Tensor generation operator intended for use in tests and examples.
+
+The contents of the generated tensor are uninitialized.
+
+**==Named Outputs==**
+
+    output : nvidia::gxf::Tensor
+        A message containing a single tensor with the a specified shape, storage type,
+        data type and name.
+
+Parameters
+----------
+fragment : holoscan.core.Fragment (constructor only)
+    The fragment that the operator belongs to.
+allocator : holoscan.resources.Allocator, optional
+    The allocator used to allocate the tensor output. If unspecified a
+    ``holoscan.resources.UnboundedAllocator`` is used.
+storage_type : {"host", "device", "system"}, optional
+    The memory storage type for the generated tensor. Here, `"system"` corresponds to CPU memory
+    while `"host"` corresponds to pinned host memory allocated using CUDA's `cudaMallocHost`.
+    Finally, `"device"` corresponds to GPU device memory allocated via `cudaMalloc`.
+batch_size : int or None, optional
+    Size of the batch dimension (default: 0). The tensor shape will be
+    ([batch], rows, [columns], [channels]) where [] around a dimension indicates that it is only
+    present if the corresponding parameter has a size > 0. If 0 or ``None``, no batch dimension
+    will be present.
+rows : int, optional
+    The number of rows in the generated tensor.
+cols : int, optional
+    The number of columns in the generated tensor. If 0 or ``None``, no columns dimension will be
+    present.
+channels : int, optional
+    The number of channels in the generated tensor. If 0 or ``None``, no channels dimension will be
+    present.
+data_type : str or numpy.dtype, optional
+    The data type used by the tensor. Should be a string matching one of the following C++ types
+    {"int8_t", "int16_t", "int32_t", "int64_t", "uint8_t", "uint16_t"," "uint32_t", "uint64_t",
+     "float", "double", "complex<float>", "complex<double>"}. Alternatively, a ``numpy.dtype``
+    object can be provided to indicate the desired data type.
+tensor_name : str, optional
+    The name of the output tensor.
+name : str, optional (constructor only)
+    The name of the operator. Default value is ``"ping_tensor_tx"``.
+
+)doc")
+
+}  // namespace holoscan::doc::PingTensorTxOp
+
+#endif /* PYHOLOSCAN_OPERATORS_PING_TENSOR_TX_PYDOC_HPP */
diff --git a/python/holoscan/operators/segmentation_postprocessor/pydoc.hpp b/python/holoscan/operators/segmentation_postprocessor/pydoc.hpp
index 3d60eba..ab2ec63 100644
--- a/python/holoscan/operators/segmentation_postprocessor/pydoc.hpp
+++ b/python/holoscan/operators/segmentation_postprocessor/pydoc.hpp
@@ -65,22 +65,6 @@ name : str, optional (constructor only)
     The name of the operator. Default value is ``"segmentation_postprocessor"``.
 )doc")
 
-PYDOC(initialize, R"doc(
-Initialize the operator.
-
-This method is called only once when the operator is created for the first time,
-and uses a light-weight initialization.
-)doc")
-
-PYDOC(setup, R"doc(
-Define the operator specification.
-
-Parameters
-----------
-spec : holoscan.core.OperatorSpec
-    The operator specification.
-)doc")
-
 }  // namespace holoscan::doc::SegmentationPostprocessorOp
 
 #endif /* PYHOLOSCAN_OPERATORS_SEGMENTATION_POSTPROCESSOR_PYDOC_HPP */
diff --git a/python/holoscan/operators/segmentation_postprocessor/segmentation_postprocessor.cpp b/python/holoscan/operators/segmentation_postprocessor/segmentation_postprocessor.cpp
index ee5e8a3..e6f2854 100644
--- a/python/holoscan/operators/segmentation_postprocessor/segmentation_postprocessor.cpp
+++ b/python/holoscan/operators/segmentation_postprocessor/segmentation_postprocessor.cpp
@@ -106,10 +106,6 @@ PYBIND11_MODULE(_segmentation_postprocessor, m) {
            "data_format"_a = "hwc"s,
            "cuda_stream_pool"_a = py::none(),
            "name"_a = "segmentation_postprocessor"s,
-           doc::SegmentationPostprocessorOp::doc_SegmentationPostprocessorOp)
-      .def("setup",
-           &SegmentationPostprocessorOp::setup,
-           "spec"_a,
-           doc::SegmentationPostprocessorOp::doc_setup);
+           doc::SegmentationPostprocessorOp::doc_SegmentationPostprocessorOp);
 }  // PYBIND11_MODULE NOLINT
 }  // namespace holoscan::ops
diff --git a/python/holoscan/operators/v4l2_video_capture/pydoc.hpp b/python/holoscan/operators/v4l2_video_capture/pydoc.hpp
index 312228f..8a6689b 100644
--- a/python/holoscan/operators/v4l2_video_capture/pydoc.hpp
+++ b/python/holoscan/operators/v4l2_video_capture/pydoc.hpp
@@ -33,12 +33,13 @@ Operator to get a video stream from a V4L2 source.
 Inputs a video stream from a V4L2 node, including USB cameras and HDMI IN.
 
  - Input stream is on host. If no pixel format is specified in the yaml configuration file, the
-   pixel format will be automatically selected. However, only ``AB24``, ``YUYV``, and ``MJPG`` are then
-   supported.
+   pixel format will be automatically selected. However, only `AB24`, `YUYV`, `MJPG`, and `RGB3`
+   are then supported.
    If a pixel format is specified in the yaml file, then this format will be used. However, note
-   that the operator then expects that this format can be encoded as RGBA32. If not, the behavior
-   is undefined.
- - Output stream is on host. Always RGBA32 at this time.
+   if `pass_through` is `false` that the operator then expects that this format can be encoded as
+   RGBA32. If not, the behavior is undefined.
+ - Output stream is on host. if `pass_through` is `false` (the default) the video buffer is
+   converted to RGBA32, else output the input video buffer unmodified.
 
 Use ``holoscan.operators.FormatConverterOp`` to move data from the host to a GPU device.
 
@@ -81,6 +82,9 @@ num_buffers : int, optional
 pixel_format : str
     Video stream pixel format (little endian four character code (fourcc)).
     Default value is ``"auto"``.
+pass_through : bool
+    If set, pass through the input buffer to the output unmodified, else convert to RGBA32.
+    Default value is ``False``.
 name : str, optional (constructor only)
     The name of the operator. Default value is ``"v4l2_video_capture"``.
 exposure_time : int, optional
@@ -102,22 +106,6 @@ gain : int, optional
     then used to set V4L2_CID_GAIN.
 )doc")
 
-PYDOC(setup, R"doc(
-Define the operator specification.
-
-Parameters
-----------
-spec : ``holoscan.core.OperatorSpec``
-    The operator specification.
-)doc")
-
-PYDOC(initialize, R"doc(
-Initialize the operator.
-
-This method is called only once when the operator is created for the first time,
-and uses a light-weight initialization.
-)doc")
-
 }  // namespace holoscan::doc::V4L2VideoCaptureOp
 
 #endif /* PYHOLOSCAN_OPERATORS_V4L2_VIDEO_CAPTURE_PYDOC_HPP */
diff --git a/python/holoscan/operators/v4l2_video_capture/v4l2_video_capture.cpp b/python/holoscan/operators/v4l2_video_capture/v4l2_video_capture.cpp
index de14fff..ab49897 100644
--- a/python/holoscan/operators/v4l2_video_capture/v4l2_video_capture.cpp
+++ b/python/holoscan/operators/v4l2_video_capture/v4l2_video_capture.cpp
@@ -20,6 +20,7 @@
 
 #include <cstdint>
 #include <memory>
+#include <optional>
 #include <string>
 
 #include "../operator_util.hpp"
@@ -63,6 +64,7 @@ class PyV4L2VideoCaptureOp : public V4L2VideoCaptureOp {
                        const std::string& device = "/dev/video0"s, uint32_t width = 0,
                        uint32_t height = 0, uint32_t num_buffers = 4,
                        const std::string& pixel_format = "auto",
+                       bool pass_through = false,
                        const std::string& name = "v4l2_video_capture",
                        std::optional<uint32_t> exposure_time = std::nullopt,
                        std::optional<uint32_t> gain = std::nullopt)
@@ -71,13 +73,10 @@ class PyV4L2VideoCaptureOp : public V4L2VideoCaptureOp {
                                    Arg{"width", width},
                                    Arg{"height", height},
                                    Arg{"numBuffers", num_buffers},
-                                   Arg{"pixel_format", pixel_format}}) {
-    if (exposure_time.has_value()) {
-      this->add_arg(Arg{"exposure_time", exposure_time.value() });
-    }
-    if (gain.has_value()) {
-      this->add_arg(Arg{"gain", gain.value() });
-    }
+                                   Arg{"pixel_format", pixel_format},
+                                   Arg{"pass_through", pass_through}}) {
+    if (exposure_time.has_value()) { this->add_arg(Arg{"exposure_time", exposure_time.value()}); }
+    if (gain.has_value()) { this->add_arg(Arg{"gain", gain.value()}); }
     add_positional_condition_and_resource_args(this, args);
     name_ = name;
     fragment_ = fragment;
@@ -106,6 +105,7 @@ PYBIND11_MODULE(_v4l2_video_capture, m) {
                     uint32_t,
                     uint32_t,
                     const std::string&,
+                    bool,
                     const std::string&,
                     std::optional<uint32_t>,
                     std::optional<uint32_t>>(),
@@ -116,11 +116,10 @@ PYBIND11_MODULE(_v4l2_video_capture, m) {
            "height"_a = 0,
            "num_buffers"_a = 4,
            "pixel_format"_a = "auto"s,
+           "pass_through"_a = false,
            "name"_a = "v4l2_video_capture"s,
            "exposure_time"_a = py::none(),
            "gain"_a = py::none(),
-           doc::V4L2VideoCaptureOp::doc_V4L2VideoCaptureOp)
-      .def("initialize", &V4L2VideoCaptureOp::initialize, doc::V4L2VideoCaptureOp::doc_initialize)
-      .def("setup", &V4L2VideoCaptureOp::setup, "spec"_a, doc::V4L2VideoCaptureOp::doc_setup);
+           doc::V4L2VideoCaptureOp::doc_V4L2VideoCaptureOp);
 }  // PYBIND11_MODULE NOLINT
 }  // namespace holoscan::ops
diff --git a/python/holoscan/operators/video_stream_recorder/pydoc.hpp b/python/holoscan/operators/video_stream_recorder/pydoc.hpp
index 919158c..8d803ea 100644
--- a/python/holoscan/operators/video_stream_recorder/pydoc.hpp
+++ b/python/holoscan/operators/video_stream_recorder/pydoc.hpp
@@ -50,22 +50,6 @@ name : str, optional (constructor only)
     The name of the operator. Default value is ``"video_stream_recorder"``.
 )doc")
 
-PYDOC(initialize, R"doc(
-Initialize the operator.
-
-This method is called only once when the operator is created for the first time,
-and uses a light-weight initialization.
-)doc")
-
-PYDOC(setup, R"doc(
-Define the operator specification.
-
-Parameters
-----------
-spec : holoscan.core.OperatorSpec
-    The operator specification.
-)doc")
-
 }  // namespace holoscan::doc::VideoStreamRecorderOp
 
 #endif /* PYHOLOSCAN_OPERATORS_VIDEO_STREAM_RECORDER_PYDOC_HPP */
diff --git a/python/holoscan/operators/video_stream_recorder/video_stream_recorder.cpp b/python/holoscan/operators/video_stream_recorder/video_stream_recorder.cpp
index 55b5a12..120f9de 100644
--- a/python/holoscan/operators/video_stream_recorder/video_stream_recorder.cpp
+++ b/python/holoscan/operators/video_stream_recorder/video_stream_recorder.cpp
@@ -92,11 +92,7 @@ PYBIND11_MODULE(_video_stream_recorder, m) {
            "directory"_a,
            "basename"_a,
            "flush_on_tick"_a = false,
-           "name"_a = "recorder"s,
-           doc::VideoStreamRecorderOp::doc_VideoStreamRecorderOp)
-      .def("initialize",
-           &VideoStreamRecorderOp::initialize,
-           doc::VideoStreamRecorderOp::doc_initialize)
-      .def("setup", &VideoStreamRecorderOp::setup, "spec"_a, doc::VideoStreamRecorderOp::doc_setup);
+           "name"_a = "video_stream_recorder"s,
+           doc::VideoStreamRecorderOp::doc_VideoStreamRecorderOp);
 }  // PYBIND11_MODULE NOLINT
 }  // namespace holoscan::ops
diff --git a/python/holoscan/operators/video_stream_replayer/pydoc.hpp b/python/holoscan/operators/video_stream_replayer/pydoc.hpp
index 118daef..56d21ef 100644
--- a/python/holoscan/operators/video_stream_replayer/pydoc.hpp
+++ b/python/holoscan/operators/video_stream_replayer/pydoc.hpp
@@ -59,26 +59,18 @@ count : int, optional
     Number of frame counts to playback. If zero value is specified, it is
     ignored. If the count is less than the number of frames in the video, it
     would finish early. Default value is ``0``.
+allocator : holoscan.core.Allocator
+    Allocator used to tensor memory. Currently, only the ``holoscan.resources.UnboundedAllocator``
+    is supported. The default value of ``None`` will lead to use of a
+    ``holoscan.resources.UnboundedAllocator``.
+entity_serializer : holoscan.core.EntitySerializer
+    The entity serializer used for deserialization. The default value of ``None``
+    will lead to use of a default ``holoscan.resources.StdEntitySerializer``. If this argument is
+    specified, then the `allocator` argument is ignored.
 name : str, optional (constructor only)
     The name of the operator. Default value is ``"video_stream_replayer"``.
 )doc")
 
-PYDOC(initialize, R"doc(
-Initialize the operator.
-
-This method is called only once when the operator is created for the first time,
-and uses a light-weight initialization.
-)doc")
-
-PYDOC(setup, R"doc(
-Define the operator specification.
-
-Parameters
-----------
-spec : holoscan.core.OperatorSpec
-    The operator specification.
-)doc")
-
 }  // namespace holoscan::doc::VideoStreamReplayerOp
 
 #endif /* PYHOLOSCAN_OPERATORS_VIDEO_STREAM_REPLAYER_PYDOC_HPP */
diff --git a/python/holoscan/operators/video_stream_replayer/video_stream_replayer.cpp b/python/holoscan/operators/video_stream_replayer/video_stream_replayer.cpp
index 73712be..6f9c815 100644
--- a/python/holoscan/operators/video_stream_replayer/video_stream_replayer.cpp
+++ b/python/holoscan/operators/video_stream_replayer/video_stream_replayer.cpp
@@ -18,6 +18,7 @@
 #include <pybind11/pybind11.h>
 
 #include <memory>
+#include <optional>
 #include <string>
 
 #include "../operator_util.hpp"
@@ -26,6 +27,7 @@
 #include "holoscan/core/fragment.hpp"
 #include "holoscan/core/operator.hpp"
 #include "holoscan/core/operator_spec.hpp"
+#include "holoscan/core/resources/gxf/allocator.hpp"
 #include "holoscan/operators/video_stream_replayer/video_stream_replayer.hpp"
 
 using std::string_literals::operator""s;
@@ -54,11 +56,13 @@ class PyVideoStreamReplayerOp : public VideoStreamReplayerOp {
   using VideoStreamReplayerOp::VideoStreamReplayerOp;
 
   // Define a constructor that fully initializes the object.
-  PyVideoStreamReplayerOp(Fragment* fragment, const py::args& args, const std::string& directory,
-                          const std::string& basename, size_t batch_size = 1UL,
-                          bool ignore_corrupted_entities = true, float frame_rate = 0.f,
-                          bool realtime = true, bool repeat = false, uint64_t count = 0UL,
-                          const std::string& name = "video_stream_replayer")
+  PyVideoStreamReplayerOp(
+      Fragment* fragment, const py::args& args, const std::string& directory,
+      const std::string& basename, size_t batch_size = 1UL, bool ignore_corrupted_entities = true,
+      float frame_rate = 0.f, bool realtime = true, bool repeat = false, uint64_t count = 0UL,
+      std::optional<std::shared_ptr<holoscan::Allocator>> allocator = std::nullopt,
+      std::optional<std::shared_ptr<holoscan::Resource>> entity_serializer = std::nullopt,
+      const std::string& name = "video_stream_replayer")
       : VideoStreamReplayerOp(ArgList{Arg{"directory", directory},
                                       Arg{"basename", basename},
                                       Arg{"batch_size", batch_size},
@@ -68,6 +72,10 @@ class PyVideoStreamReplayerOp : public VideoStreamReplayerOp {
                                       Arg{"repeat", repeat},
                                       Arg{"count", count}}) {
     add_positional_condition_and_resource_args(this, args);
+    if (allocator.has_value()) { this->add_arg(Arg{"allocator", allocator.value()}); }
+    if (entity_serializer.has_value()) {
+      this->add_arg(Arg{"entity_serializer", entity_serializer.value()});
+    }
     name_ = name;
     fragment_ = fragment;
     spec_ = std::make_shared<OperatorSpec>(fragment);
@@ -99,6 +107,8 @@ PYBIND11_MODULE(_video_stream_replayer, m) {
                     bool,
                     bool,
                     uint64_t,
+                    std::optional<std::shared_ptr<holoscan::Allocator>>,
+                    std::optional<std::shared_ptr<holoscan::Resource>>,
                     const std::string&>(),
            "fragment"_a,
            "directory"_a,
@@ -109,11 +119,9 @@ PYBIND11_MODULE(_video_stream_replayer, m) {
            "realtime"_a = true,
            "repeat"_a = false,
            "count"_a = 0UL,
+           "allocator"_a = py::none(),
+           "entity_serializer"_a = py::none(),
            "name"_a = "video_stream_replayer"s,
-           doc::VideoStreamReplayerOp::doc_VideoStreamReplayerOp)
-      .def("initialize",
-           &VideoStreamReplayerOp::initialize,
-           doc::VideoStreamReplayerOp::doc_initialize)
-      .def("setup", &VideoStreamReplayerOp::setup, "spec"_a, doc::VideoStreamReplayerOp::doc_setup);
+           doc::VideoStreamReplayerOp::doc_VideoStreamReplayerOp);
 }  // PYBIND11_MODULE NOLINT
 }  // namespace holoscan::ops
diff --git a/python/requirements.txt b/python/requirements.txt
index b1ec558..3414512 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -1,4 +1,4 @@
-pip>=20.3
+pip>22.0.2
 cupy-cuda12x==12.2
 cloudpickle==2.2.1
 python-on-whales==0.60.1
diff --git a/python/tests/cli/unit/common/package-source.json b/python/tests/cli/unit/common/package-source.json
index 7b4199c..c4352b3 100644
--- a/python/tests/cli/unit/common/package-source.json
+++ b/python/tests/cli/unit/common/package-source.json
@@ -1,43 +1,32 @@
 {
-    "1.0.3": {
+    "2.4.0": {
         "holoscan": {
-            "debian-packages": {
-                "linux/amd64": "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/holoscan_1.0.3.0-1_amd64.deb",
-                "linux/arm64": {
-                    "igpu": "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/holoscan_1.0.3.0-1_arm64.deb",
-                    "dgpu": "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/sbsa/holoscan_1.0.3.0-1_arm64.deb"
-                }
-            },
+            "debian-version": "2.3.0.1-1",
+            "wheel-version": "2.3.0",
             "base-images": {
-                "igpu": {
-                    "jetson-agx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v1.0.3-igpu",
-                    "igx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v1.0.3-igpu"
-                },
-                "dgpu": {
-                    "x64-workstation": "nvcr.io/nvidia/clara-holoscan/holoscan:v1.0.3-dgpu",
-                    "igx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v1.0.3-dgpu"
-                },
-                "cpu": {
-                    "x64-workstation": "nvcr.io/nvidia/clara-holoscan/holoscan:v1.0.3-dgpu"
-                }
+                "dgpu": "nvcr.io/nvidia/cuda:12.2.2-runtime-ubuntu22.04",
+                "igpu": "nvcr.io/nvidia/tensorrt:23.12-py3-igpu"
             },
             "build-images": {
                 "igpu": {
-                    "jetson-agx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v1.0.3-igpu",
-                    "igx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v1.0.3-igpu"
+                    "jetson-agx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v2.3.0-igpu",
+                    "igx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v2.3.0-igpu",
+                    "sbsa": "nvcr.io/nvidia/clara-holoscan/holoscan:v2.3.0-igpu"
                 },
                 "dgpu": {
-                    "x64-workstation": "nvcr.io/nvidia/clara-holoscan/holoscan:v1.0.3-dgpu",
-                    "igx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v1.0.3-dgpu"
+                    "x64-workstation": "nvcr.io/nvidia/clara-holoscan/holoscan:v2.3.0-dgpu",
+                    "igx-orin-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v2.3.0-dgpu",
+                    "sbsa": "nvcr.io/nvidia/clara-holoscan/holoscan:v2.3.0-dgpu",
+                    "clara-agx-devkit": "nvcr.io/nvidia/clara-holoscan/holoscan:v2.3.0-dgpu"
                 },
                 "cpu": {
-                    "x64-workstation": "nvcr.io/nvidia/clara-holoscan/holoscan:v1.0.3-dgpu"
+                    "x64-workstation": "nvcr.io/nvidia/clara-holoscan/holoscan:v2.3.0-dgpu"
                 }
             }
         },
         "health-probes": {
-            "linux/amd64": "https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/v0.4.24/grpc_health_probe-linux-amd64",
-            "linux/arm64": "https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/v0.4.24/grpc_health_probe-linux-arm64"
+            "linux/amd64": "https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/v0.4.19/grpc_health_probe-linux-amd64",
+            "linux/arm64": "https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/v0.4.19/grpc_health_probe-linux-arm64"
         }
     }
 }
\ No newline at end of file
diff --git a/python/tests/cli/unit/common/test_argparse_types.py b/python/tests/cli/unit/common/test_argparse_types.py
index 2a7e3c0..82fe164 100644
--- a/python/tests/cli/unit/common/test_argparse_types.py
+++ b/python/tests/cli/unit/common/test_argparse_types.py
@@ -1,21 +1,22 @@
 """
- SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- SPDX-License-Identifier: Apache-2.0
+SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
 
- http://www.apache.org/licenses/LICENSE-2.0
+http://www.apache.org/licenses/LICENSE-2.0
 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
 """  # noqa: E501
 
 import argparse
+import os
 import pathlib
 from pathlib import PosixPath
 from typing import List
@@ -63,7 +64,8 @@ def test_dir_exists_and_isdir_and_expands_user_dir(self, monkeypatch):
         result = valid_dir_path("~/this/is/some/path")
 
         assert type(result) is PosixPath
-        assert str(result).startswith("/home")
+
+        assert str(result).startswith(os.path.expanduser("~"))
 
 
 class TestValidExistingDirPath:
diff --git a/python/tests/cli/unit/common/test_artifact_source.py b/python/tests/cli/unit/common/test_artifact_source.py
index 7731ea9..26abfa3 100644
--- a/python/tests/cli/unit/common/test_artifact_source.py
+++ b/python/tests/cli/unit/common/test_artifact_source.py
@@ -1,18 +1,18 @@
 """
- SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- SPDX-License-Identifier: Apache-2.0
+SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
 
- http://www.apache.org/licenses/LICENSE-2.0
+http://www.apache.org/licenses/LICENSE-2.0
 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
 """  # noqa: E501
 
 from pathlib import Path
@@ -20,7 +20,6 @@
 import pytest
 
 from holoscan.cli.common.artifact_sources import ArtifactSources
-from holoscan.cli.common.enum_types import Arch, PlatformConfiguration
 
 
 class TestArtifactSource:
@@ -30,10 +29,6 @@ def _init(self) -> None:
         source_file_sample = current_file_path / "./package-source.json"
         self._artifact_source.load(str(source_file_sample))
 
-    def test_loads_from_edge(self, monkeypatch):
-        artifact_source = ArtifactSources()
-        artifact_source.download_manifest()
-
     def test_loads_invalid_file(self, monkeypatch):
         monkeypatch.setattr(Path, "read_text", lambda x: "{}")
 
@@ -43,26 +38,22 @@ def test_loads_invalid_file(self, monkeypatch):
         with pytest.raises(FileNotFoundError):
             artifact_sources.load(str(source_file_sample))
 
-    @pytest.mark.parametrize(
-        "arch,platform_config",
-        [
-            (Arch.amd64, PlatformConfiguration.dGPU),
-            (Arch.arm64, PlatformConfiguration.dGPU),
-            (Arch.arm64, PlatformConfiguration.iGPU),
-        ],
-    )
-    def test_debian_package(self, arch, platform_config):
+    def test_debian_package_version(self):
+        self._init()
+        assert self._artifact_source.debian_package_version("2.4.0") is not None
+
+    def test_debian_package_version_missing(self):
         self._init()
-        assert self._artifact_source.debian_packages("1.0.3", arch, platform_config) is not None
+        assert self._artifact_source.debian_package_version("2.4.1") is None
 
     def test_base_images(self):
         self._init()
-        assert self._artifact_source.base_images("1.0.3") is not None
+        assert self._artifact_source.base_image("2.4.0") is not None
 
     def test_build_images(self):
         self._init()
-        assert self._artifact_source.build_images("1.0.3") is not None
+        assert self._artifact_source.build_images("2.4.0") is not None
 
     def test_health_probe(self):
         self._init()
-        assert self._artifact_source.health_probe("1.0.3") is not None
+        assert self._artifact_source.health_probe("2.4.0") is not None
diff --git a/python/tests/cli/unit/conftest.py b/python/tests/cli/unit/conftest.py
new file mode 100644
index 0000000..7242416
--- /dev/null
+++ b/python/tests/cli/unit/conftest.py
@@ -0,0 +1,27 @@
+"""
+ SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ SPDX-License-Identifier: Apache-2.0
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def configure_version(monkeypatch):  # noqa: PT004
+    with pytest.MonkeyPatch.context() as mp:
+        version = "2.4.0"
+        mp.setattr("holoscan.cli.common.artifact_sources.holoscan_version_string", version)
+        yield mp
diff --git a/python/tests/cli/unit/packager/test_arguments.py b/python/tests/cli/unit/packager/test_arguments.py
index fdea964..3526816 100644
--- a/python/tests/cli/unit/packager/test_arguments.py
+++ b/python/tests/cli/unit/packager/test_arguments.py
@@ -16,7 +16,6 @@
 """  # noqa: E501
 
 import pathlib
-import platform
 from argparse import Namespace
 
 import pytest
@@ -47,6 +46,7 @@ def _setup(self) -> None:
         self.input_args.source = pathlib.Path("/path/to/source.json")
         self.input_args.platform = Platform.X64Workstation
         self.input_args.platform_config = PlatformConfiguration.dGPU
+        self.input_args.includes = []
 
         self.source_load_called = False
 
@@ -91,6 +91,10 @@ def _setup_mocks(self, monkeypatch):
             "holoscan.cli.packager.config_reader.PackageBuildParameters._set_app_command",
             lambda x: None,
         )
+        monkeypatch.setattr(
+            "holoscan.cli.common.artifact_sources.ArtifactSources.download_manifest",
+            lambda x: None,
+        )
 
         def mock_artifact_load(x, y):
             self.source_load_called = True
@@ -108,6 +112,7 @@ def test_input_args(self, monkeypatch):
         assert args.build_parameters.docs == self.input_args.docs
         assert args.build_parameters.docs_dir == DefaultValues.HOLOSCAN_DOCS_DIR
         assert args.build_parameters.logs_dir == DefaultValues.HOLOSCAN_LOGS_DIR
+        assert args.build_parameters.app_config_file_path == self.input_args.config
         assert (
             args.build_parameters.full_input_path
             == DefaultValues.WORK_DIR / DefaultValues.INPUT_DIR
@@ -116,16 +121,6 @@ def test_input_args(self, monkeypatch):
             args.build_parameters.full_output_path
             == DefaultValues.WORK_DIR / DefaultValues.OUTPUT_DIR
         )
-        assert (
-            args.build_parameters.cuda_deb_arch == "sbsa"
-            if platform.processor() == "aarch64"
-            else "x86_64"
-        )
-        assert (
-            args.build_parameters.holoscan_deb_arch == "arm64"
-            if platform.processor() == "aarch64"
-            else "amd64"
-        )
         assert args.build_parameters.input_dir == DefaultValues.INPUT_DIR
         assert args.build_parameters.models_dir == DefaultValues.MODELS_DIR
         assert args.build_parameters.output_dir == DefaultValues.OUTPUT_DIR
diff --git a/python/tests/cli/unit/packager/test_parameters.py b/python/tests/cli/unit/packager/test_parameters.py
index ab0b904..b7f5396 100644
--- a/python/tests/cli/unit/packager/test_parameters.py
+++ b/python/tests/cli/unit/packager/test_parameters.py
@@ -17,31 +17,38 @@
 
 import os
 import pathlib
-import platform
 
 import pytest
 
 from holoscan.cli.common.constants import Constants, DefaultValues
-from holoscan.cli.common.enum_types import ApplicationType
+from holoscan.cli.common.enum_types import (
+    ApplicationType,
+    Platform,
+    PlatformConfiguration,
+)
 from holoscan.cli.common.exceptions import UnknownApplicationTypeError
-from holoscan.cli.packager.parameters import PackageBuildParameters
+from holoscan.cli.packager.parameters import PackageBuildParameters, PlatformParameters
 
 
-class TestPackageBuildParameters:
+class TestPlatformParameters:
     def test_with_aarch64(self, monkeypatch):
-        monkeypatch.setattr(platform, "processor", lambda: "aarch64")
-
-        build_parameters = PackageBuildParameters()
+        build_parameters = PlatformParameters(
+            Platform.IGXOrinDevIt, PlatformConfiguration.iGPU, "my-container-app", "1.2.3"
+        )
         assert build_parameters.holoscan_deb_arch == "arm64"
         assert build_parameters.cuda_deb_arch == "sbsa"
+        assert build_parameters.target_arch == "aarch64"
 
     def test_with_x64(self, monkeypatch):
-        monkeypatch.setattr(platform, "processor", lambda: "x86_64")
-
-        build_parameters = PackageBuildParameters()
+        build_parameters = PlatformParameters(
+            Platform.X64Workstation, PlatformConfiguration.dGPU, "my-container-app", "1.2.3"
+        )
         assert build_parameters.holoscan_deb_arch == "amd64"
         assert build_parameters.cuda_deb_arch == "x86_64"
+        assert build_parameters.target_arch == "x86_64"
 
+
+class TestPackageBuildParameters:
     def test_set_application_python_dir(self, monkeypatch):
         input_dir = pathlib.Path("/path/to/my/python/app/dir")
 
diff --git a/python/tests/cli/unit/packager/test_platforms.py b/python/tests/cli/unit/packager/test_platforms.py
new file mode 100644
index 0000000..ee00486
--- /dev/null
+++ b/python/tests/cli/unit/packager/test_platforms.py
@@ -0,0 +1,458 @@
+"""
+ SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ SPDX-License-Identifier: Apache-2.0
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""  # noqa: E501
+
+import platform as platform_lib
+import tempfile
+from argparse import Namespace
+from pathlib import Path
+
+import pytest
+from packaging.version import Version
+
+from holoscan.cli.common.artifact_sources import ArtifactSources
+from holoscan.cli.common.constants import SDK
+from holoscan.cli.common.enum_types import ApplicationType, PlatformConfiguration, SdkType
+from holoscan.cli.common.enum_types import Platform as PlatformTypes
+from holoscan.cli.common.exceptions import IncompatiblePlatformConfigurationError
+from holoscan.cli.packager.platforms import Platform
+
+
+class TestPlatforms:
+    @pytest.fixture(autouse=True)
+    def _setup(self) -> None:
+        self._artifact_source = ArtifactSources()
+        current_file_path = Path(__file__).parent.parent.resolve() / "common"
+        source_file_sample = current_file_path / "./package-source.json"
+        self._artifact_source.load(str(source_file_sample))
+
+    def test_invalid_platform_options(self, monkeypatch):
+        monkeypatch.setattr(
+            "holoscan.cli.packager.platforms.detect_sdk", lambda sdk: SdkType.Holoscan
+        )
+        monkeypatch.setattr(
+            "holoscan.cli.packager.platforms.detect_sdk_version",
+            lambda sdk, artifact_sources, sdk_version: ("2.4.0", None),
+        )
+
+        application_verison = "1.0.0"
+        input_args = Namespace()
+        input_args.sdk = SdkType.Holoscan
+        input_args.sdk_version = Version("2.4.0")
+        input_args.platform = [PlatformTypes.IGXOrinDevIt, PlatformTypes.X64Workstation]
+        input_args.holoscan_sdk_file = Path("some-random-file")
+
+        platform = Platform(self._artifact_source)
+        with tempfile.TemporaryDirectory(
+            prefix="holoscan_test", dir=tempfile.gettempdir()
+        ) as temp_dir, pytest.raises(IncompatiblePlatformConfigurationError):
+            platform.configure_platforms(
+                input_args, temp_dir, application_verison, ApplicationType.CppCMake
+            )
+
+    def test_invalid_platform_options_holoscan_sdk_type_with_monai_deploy_sdk_file(
+        self, monkeypatch
+    ):
+        monkeypatch.setattr(
+            "holoscan.cli.packager.platforms.detect_sdk", lambda sdk: SdkType.Holoscan
+        )
+        monkeypatch.setattr(
+            "holoscan.cli.packager.platforms.detect_sdk_version",
+            lambda sdk, artifact_sources, sdk_version: ("2.4.0", None),
+        )
+
+        application_verison = "1.0.0"
+        input_args = Namespace()
+        input_args.sdk = SdkType.Holoscan
+        input_args.sdk_version = Version("2.4.0")
+        input_args.platform = [PlatformTypes.IGXOrinDevIt, PlatformTypes.X64Workstation]
+        input_args.holoscan_sdk_file = None
+        input_args.monai_deploy_sdk_file = Path("some-random-file")
+
+        platform = Platform(self._artifact_source)
+        with tempfile.TemporaryDirectory(
+            prefix="holoscan_test", dir=tempfile.gettempdir()
+        ) as temp_dir, pytest.raises(IncompatiblePlatformConfigurationError):
+            platform.configure_platforms(
+                input_args, temp_dir, application_verison, ApplicationType.CppCMake
+            )
+
+    def test_single_platform_with_monai_deploy(self, monkeypatch):
+        holoscan_version = "2.4.0"
+        monai_deploy_version = "2.4.1"
+        sdk_type = SdkType.MonaiDeploy
+        monkeypatch.setattr("holoscan.cli.packager.platforms.detect_sdk", lambda sdk: sdk_type)
+        monkeypatch.setattr(
+            "holoscan.cli.packager.platforms.detect_sdk_version",
+            lambda sdk, artifact_sources, sdk_version: (holoscan_version, monai_deploy_version),
+        )
+
+        application_verison = "1.0.0"
+        input_args = Namespace()
+        input_args.sdk = SdkType.Holoscan
+        input_args.platform = [PlatformTypes.IGXOrinDevIt]
+        input_args.platform_config = PlatformConfiguration.dGPU
+        input_args.tag = "my-app"
+        input_args.sdk_version = None
+        input_args.holoscan_sdk_file = None
+        input_args.monai_deploy_sdk_file = None
+        input_args.base_image = None
+        input_args.build_image = None
+
+        platform = Platform(self._artifact_source)
+        with tempfile.TemporaryDirectory(
+            prefix="holoscan_test", dir=tempfile.gettempdir()
+        ) as temp_dir:
+            (sdk, hsdk_version, md_version, platforms) = platform.configure_platforms(
+                input_args, temp_dir, application_verison, ApplicationType.PythonModule
+            )
+
+            assert sdk == sdk_type
+            assert hsdk_version == holoscan_version
+            assert md_version == monai_deploy_version
+            assert len(platforms) == 1
+
+            platform_parameters = platforms[0]
+
+            assert platform_parameters.platform == input_args.platform[0]
+            assert (
+                platform_parameters.base_image
+                == self._artifact_source.base_image(holoscan_version)[
+                    input_args.platform_config.value
+                ]
+            )
+            assert platform_parameters.build_image is None
+            assert platform_parameters.tag == "my-app-igx-orin-devkit-dgpu-linux-arm64:1.0.0"
+            assert platform_parameters.tag_prefix == "my-app"
+            assert platform_parameters.custom_base_image is False
+            assert platform_parameters.custom_holoscan_sdk is False
+            assert (
+                platform_parameters.holoscan_sdk_file
+                == self._artifact_source.wheel_package_version(holoscan_version)
+            )
+            assert platform_parameters.custom_monai_deploy_sdk is False
+            assert platform_parameters.monai_deploy_sdk_file is None
+            assert platform_parameters.version == application_verison
+            assert platform_parameters.health_probe is None
+            assert (
+                platform_parameters.platform_arch == SDK.PLATFORM_MAPPINGS[input_args.platform[0]]
+            )
+            assert (
+                platform_parameters.docker_arch
+                == SDK.PLATFORM_MAPPINGS[input_args.platform[0]].value
+            )
+            assert platform_parameters.platform == input_args.platform[0]
+            assert platform_parameters.same_arch_as_system == (platform_lib.machine() == "aarch64")
+            assert platform_parameters.cuda_deb_arch == "sbsa"
+            assert platform_parameters.holoscan_deb_arch == "arm64"
+            assert platform_parameters.target_arch == "aarch64"
+
+    def test_single_platform_with_monai_deploy_using_custom_sdk(self, monkeypatch):
+        holoscan_version = "2.4.0"
+        monai_deploy_version = "2.4.1"
+        sdk_type = SdkType.MonaiDeploy
+        monkeypatch.setattr("holoscan.cli.packager.platforms.detect_sdk", lambda sdk: sdk_type)
+        monkeypatch.setattr(
+            "holoscan.cli.packager.platforms.detect_sdk_version",
+            lambda sdk, artifact_sources, sdk_version: (holoscan_version, monai_deploy_version),
+        )
+
+        application_verison = "1.0.0"
+        input_args = Namespace()
+        input_args.sdk = SdkType.Holoscan
+        input_args.platform = [PlatformTypes.IGXOrinDevIt]
+        input_args.platform_config = PlatformConfiguration.dGPU
+        input_args.tag = "my-app"
+        input_args.sdk_version = None
+        input_args.holoscan_sdk_file = None
+        input_args.monai_deploy_sdk_file = Path("my-monai-deploy-sdk.whl")
+        input_args.base_image = None
+        input_args.build_image = None
+
+        platform = Platform(self._artifact_source)
+        with tempfile.TemporaryDirectory(
+            prefix="holoscan_test", dir=tempfile.gettempdir()
+        ) as temp_dir:
+            (sdk, hsdk_version, md_version, platforms) = platform.configure_platforms(
+                input_args, temp_dir, application_verison, ApplicationType.PythonModule
+            )
+
+            assert sdk == sdk_type
+            assert hsdk_version == holoscan_version
+            assert md_version == monai_deploy_version
+            assert len(platforms) == 1
+
+            platform_parameters = platforms[0]
+
+            assert platform_parameters.platform == input_args.platform[0]
+            assert (
+                platform_parameters.base_image
+                == self._artifact_source.base_image(holoscan_version)[
+                    input_args.platform_config.value
+                ]
+            )
+            assert platform_parameters.build_image is None
+            assert platform_parameters.tag == "my-app-igx-orin-devkit-dgpu-linux-arm64:1.0.0"
+            assert platform_parameters.tag_prefix == "my-app"
+            assert platform_parameters.custom_base_image is False
+            assert platform_parameters.custom_holoscan_sdk is False
+            assert (
+                platform_parameters.holoscan_sdk_file
+                == self._artifact_source.wheel_package_version(holoscan_version)
+            )
+            assert platform_parameters.custom_monai_deploy_sdk is True
+            assert platform_parameters.monai_deploy_sdk_file == input_args.monai_deploy_sdk_file
+            assert platform_parameters.version == application_verison
+            assert platform_parameters.health_probe is None
+            assert (
+                platform_parameters.platform_arch == SDK.PLATFORM_MAPPINGS[input_args.platform[0]]
+            )
+            assert (
+                platform_parameters.docker_arch
+                == SDK.PLATFORM_MAPPINGS[input_args.platform[0]].value
+            )
+            assert platform_parameters.platform == input_args.platform[0]
+            assert platform_parameters.same_arch_as_system == (platform_lib.machine() == "aarch64")
+            assert platform_parameters.cuda_deb_arch == "sbsa"
+            assert platform_parameters.holoscan_deb_arch == "arm64"
+            assert platform_parameters.target_arch == "aarch64"
+
+    def test_multiple_platforms(self, monkeypatch):
+        holoscan_version = "2.4.0"
+        monai_deploy_version = None
+        sdk_type = SdkType.Holoscan
+        monkeypatch.setattr("holoscan.cli.packager.platforms.detect_sdk", lambda sdk: sdk_type)
+        monkeypatch.setattr(
+            "holoscan.cli.packager.platforms.detect_sdk_version",
+            lambda sdk, artifact_sources, sdk_version: (holoscan_version, monai_deploy_version),
+        )
+
+        application_verison = "1.0.0"
+        input_args = Namespace()
+        input_args.sdk = SdkType.Holoscan
+        input_args.platform = [
+            PlatformTypes.IGXOrinDevIt,
+            PlatformTypes.X64Workstation,
+            PlatformTypes.SBSA,
+        ]
+        input_args.platform_config = PlatformConfiguration.dGPU
+        input_args.tag = "my-app"
+        input_args.sdk_version = None
+        input_args.holoscan_sdk_file = None
+        input_args.monai_deploy_sdk_file = None
+        input_args.base_image = None
+        input_args.build_image = None
+
+        platform = Platform(self._artifact_source)
+        with tempfile.TemporaryDirectory(
+            prefix="holoscan_test", dir=tempfile.gettempdir()
+        ) as temp_dir:
+            (sdk, hsdk_version, md_version, platforms) = platform.configure_platforms(
+                input_args, temp_dir, application_verison, ApplicationType.CppCMake
+            )
+
+            assert sdk == sdk_type
+            assert hsdk_version == holoscan_version
+            assert md_version == monai_deploy_version
+            assert len(platforms) == len(input_args.platform)
+
+            platform_parameters = platforms[0]
+
+            assert platform_parameters.platform == input_args.platform[0]
+            assert (
+                platform_parameters.base_image
+                == self._artifact_source.base_image(holoscan_version)[
+                    input_args.platform_config.value
+                ]
+            )
+            assert (
+                platform_parameters.build_image
+                == "nvcr.io/nvidia/clara-holoscan/holoscan:v2.3.0-dgpu"
+            )
+            assert platform_parameters.tag == "my-app-igx-orin-devkit-dgpu-linux-arm64:1.0.0"
+            assert platform_parameters.tag_prefix == "my-app"
+            assert platform_parameters.custom_base_image is False
+            assert platform_parameters.custom_holoscan_sdk is False
+            assert (
+                platform_parameters.holoscan_sdk_file
+                == self._artifact_source.debian_package_version(holoscan_version)
+            )
+            assert platform_parameters.custom_monai_deploy_sdk is None
+            assert platform_parameters.monai_deploy_sdk_file is None
+            assert platform_parameters.version == application_verison
+            assert (
+                platform_parameters.health_probe
+                == self._artifact_source.health_probe(holoscan_version)[
+                    SDK.PLATFORM_MAPPINGS[input_args.platform[0]].value
+                ]
+            )
+            assert (
+                platform_parameters.platform_arch == SDK.PLATFORM_MAPPINGS[input_args.platform[0]]
+            )
+            assert (
+                platform_parameters.docker_arch
+                == SDK.PLATFORM_MAPPINGS[input_args.platform[0]].value
+            )
+            assert platform_parameters.platform == input_args.platform[0]
+            assert platform_parameters.same_arch_as_system == (platform_lib.machine() == "aarch64")
+            assert platform_parameters.cuda_deb_arch == "sbsa"
+            assert platform_parameters.holoscan_deb_arch == "arm64"
+            assert platform_parameters.target_arch == "aarch64"
+
+    def test_platform_with_custom_base_image_and_build_image(self, monkeypatch):
+        holoscan_version = "2.4.0"
+        monai_deploy_version = None
+        sdk_type = SdkType.Holoscan
+        monkeypatch.setattr("holoscan.cli.packager.platforms.detect_sdk", lambda sdk: sdk_type)
+        monkeypatch.setattr(
+            "holoscan.cli.packager.platforms.detect_sdk_version",
+            lambda sdk, artifact_sources, sdk_version: (holoscan_version, monai_deploy_version),
+        )
+        monkeypatch.setattr(
+            "holoscan.cli.packager.platforms.image_exists",
+            lambda img: True,
+        )
+
+        application_verison = "1.0.0"
+        input_args = Namespace()
+        input_args.sdk = SdkType.Holoscan
+        input_args.platform = [PlatformTypes.IGXOrinDevIt]
+        input_args.platform_config = PlatformConfiguration.dGPU
+        input_args.tag = "my-app"
+        input_args.sdk_version = None
+        input_args.holoscan_sdk_file = None
+        input_args.monai_deploy_sdk_file = None
+        input_args.base_image = "my-base-image"
+        input_args.build_image = "my-build-image"
+
+        platform = Platform(self._artifact_source)
+        with tempfile.TemporaryDirectory(
+            prefix="holoscan_test", dir=tempfile.gettempdir()
+        ) as temp_dir:
+            (sdk, hsdk_version, md_version, platforms) = platform.configure_platforms(
+                input_args, temp_dir, application_verison, ApplicationType.CppCMake
+            )
+
+            assert sdk == sdk_type
+            assert hsdk_version == holoscan_version
+            assert md_version == monai_deploy_version
+            assert len(platforms) == 1
+
+            platform_parameters = platforms[0]
+
+            assert platform_parameters.platform == input_args.platform[0]
+            assert platform_parameters.base_image == input_args.base_image
+            assert platform_parameters.build_image == input_args.build_image
+            assert platform_parameters.tag == "my-app-igx-orin-devkit-dgpu-linux-arm64:1.0.0"
+            assert platform_parameters.tag_prefix == "my-app"
+            assert platform_parameters.custom_base_image is True
+            assert platform_parameters.custom_holoscan_sdk is False
+            assert (
+                platform_parameters.holoscan_sdk_file
+                == self._artifact_source.debian_package_version(holoscan_version)
+            )
+            assert platform_parameters.custom_monai_deploy_sdk is None
+            assert platform_parameters.monai_deploy_sdk_file is None
+            assert platform_parameters.version == application_verison
+            assert (
+                platform_parameters.health_probe
+                == self._artifact_source.health_probe(holoscan_version)[
+                    SDK.PLATFORM_MAPPINGS[input_args.platform[0]].value
+                ]
+            )
+            assert (
+                platform_parameters.platform_arch == SDK.PLATFORM_MAPPINGS[input_args.platform[0]]
+            )
+            assert (
+                platform_parameters.docker_arch
+                == SDK.PLATFORM_MAPPINGS[input_args.platform[0]].value
+            )
+            assert platform_parameters.platform == input_args.platform[0]
+            assert platform_parameters.same_arch_as_system == (platform_lib.machine() == "aarch64")
+            assert platform_parameters.cuda_deb_arch == "sbsa"
+            assert platform_parameters.holoscan_deb_arch == "arm64"
+            assert platform_parameters.target_arch == "aarch64"
+
+    def test_platform_with_custom_sdk_file(self, monkeypatch):
+        holoscan_version = "2.4.0"
+        monai_deploy_version = None
+        sdk_type = SdkType.Holoscan
+        monkeypatch.setattr("holoscan.cli.packager.platforms.detect_sdk", lambda sdk: sdk_type)
+        monkeypatch.setattr(
+            "holoscan.cli.packager.platforms.detect_sdk_version",
+            lambda sdk, artifact_sources, sdk_version: (holoscan_version, monai_deploy_version),
+        )
+        monkeypatch.setattr(
+            "holoscan.cli.packager.platforms.image_exists",
+            lambda img: True,
+        )
+
+        application_verison = "1.0.0"
+        input_args = Namespace()
+        input_args.sdk = SdkType.Holoscan
+        input_args.platform = [PlatformTypes.IGXOrinDevIt]
+        input_args.platform_config = PlatformConfiguration.iGPU
+        input_args.tag = "my-app"
+        input_args.sdk_version = None
+        input_args.holoscan_sdk_file = Path("my-sdk-file.deb")
+        input_args.monai_deploy_sdk_file = None
+        input_args.base_image = "my-base-image"
+        input_args.build_image = "my-build-image"
+
+        platform = Platform(self._artifact_source)
+        with tempfile.TemporaryDirectory(
+            prefix="holoscan_test", dir=tempfile.gettempdir()
+        ) as temp_dir:
+            (sdk, hsdk_version, md_version, platforms) = platform.configure_platforms(
+                input_args, temp_dir, application_verison, ApplicationType.CppCMake
+            )
+
+            assert sdk == sdk_type
+            assert hsdk_version == holoscan_version
+            assert md_version == monai_deploy_version
+            assert len(platforms) == 1
+
+            platform_parameters = platforms[0]
+
+            assert platform_parameters.platform == input_args.platform[0]
+            assert platform_parameters.base_image == input_args.base_image
+            assert platform_parameters.build_image == input_args.build_image
+            assert platform_parameters.tag == "my-app-igx-orin-devkit-igpu-linux-arm64:1.0.0"
+            assert platform_parameters.tag_prefix == "my-app"
+            assert platform_parameters.custom_base_image is True
+            assert platform_parameters.custom_holoscan_sdk is True
+            assert platform_parameters.holoscan_sdk_file == input_args.holoscan_sdk_file
+            assert platform_parameters.custom_monai_deploy_sdk is None
+            assert platform_parameters.monai_deploy_sdk_file is None
+            assert platform_parameters.version == application_verison
+            assert (
+                platform_parameters.health_probe
+                == self._artifact_source.health_probe(holoscan_version)[
+                    SDK.PLATFORM_MAPPINGS[input_args.platform[0]].value
+                ]
+            )
+            assert (
+                platform_parameters.platform_arch == SDK.PLATFORM_MAPPINGS[input_args.platform[0]]
+            )
+            assert (
+                platform_parameters.docker_arch
+                == SDK.PLATFORM_MAPPINGS[input_args.platform[0]].value
+            )
+            assert platform_parameters.platform == input_args.platform[0]
+            assert platform_parameters.same_arch_as_system == (platform_lib.machine() == "aarch64")
+            assert platform_parameters.cuda_deb_arch == "sbsa"
+            assert platform_parameters.holoscan_deb_arch == "arm64"
+            assert platform_parameters.target_arch == "aarch64"
diff --git a/python/tests/operator_parameters.yaml b/python/tests/operator_parameters.yaml
index 533834f..cbbbd9b 100644
--- a/python/tests/operator_parameters.yaml
+++ b/python/tests/operator_parameters.yaml
@@ -182,6 +182,7 @@ v4l2_video_capture:
   width: 320
   height: 240
   pixel_format: "auto"
+  pass_through: false
   device: "/dev/video0"
   exposure_time: 500
   gain: 100
diff --git a/python/tests/system/distributed/test_distributed_app_three_ucx_receivers.py b/python/tests/system/distributed/test_distributed_app_three_ucx_receivers.py
index 87d72e4..a19a92b 100644
--- a/python/tests/system/distributed/test_distributed_app_three_ucx_receivers.py
+++ b/python/tests/system/distributed/test_distributed_app_three_ucx_receivers.py
@@ -181,8 +181,8 @@ def launch_app(use_new_receivers=True):
         # set the max duration to 10s to have enough time to run the test
         # (connection time takes ~5 seconds)
         ("HOLOSCAN_MAX_DURATION_MS", "10000"),
-        # set the stop on deadlock timeout to 5s to have enough time to run the test
-        ("HOLOSCAN_STOP_ON_DEADLOCK_TIMEOUT", "5000"),
+        # set the stop on deadlock timeout to 10s to have enough time to run the test
+        ("HOLOSCAN_STOP_ON_DEADLOCK_TIMEOUT", "10000"),
     }
 
     with env_var_context(env_var_settings):
@@ -205,6 +205,10 @@ def test_distributed_app_three_ucx_receivers(use_new_receivers, capfd):
 
     # assert that no errors were logged
     captured = capfd.readouterr()
+
+    print("Captured stdout:", captured.out)
+    print("Captured stderr:", captured.err)
+
     # avoid catching the expected error message
     # : "error handling callback was invoked with status -25 (Connection reset by remote peer)"
     captured_error = captured.err.replace("error handling callback", "ucx handling callback")
diff --git a/python/tests/system/distributed/test_ucx_message_serialization.py b/python/tests/system/distributed/test_ucx_message_serialization.py
index 69c9bc9..a562159 100644
--- a/python/tests/system/distributed/test_ucx_message_serialization.py
+++ b/python/tests/system/distributed/test_ucx_message_serialization.py
@@ -29,6 +29,36 @@
     import cupy as cp
     import numpy as np
 
+# test HolovizOp InputSpec
+test_input_specs = []
+test_text_spec = HolovizOp.InputSpec("dynamic_text", "text")
+test_text_spec.text = ["Text1"]
+test_text_spec.color = [1.0, 0.0, 0.0, 1.0]
+test_input_specs.append(test_text_spec)
+test_input_specs.append(HolovizOp.InputSpec("triangles", HolovizOp.InputType.TRIANGLES))
+# create a full spec with all fields different from default
+test_full_spec = HolovizOp.InputSpec("full", HolovizOp.InputType.COLOR)
+test_full_spec.color = [0.5, 0.1, 0.2, 0.8]
+test_full_spec.opacity = 0.124
+test_full_spec.priority = 12
+test_full_spec.image_format = HolovizOp.ImageFormat.R32G32B32A32_SFLOAT
+test_full_spec.line_width = 12.0
+test_full_spec.point_size = 24.0
+test_full_spec.text = ["abc"]
+test_full_spec.yuv_model_conversion = HolovizOp.YuvModelConversion.YUV_2020
+test_full_spec.yuv_range = HolovizOp.YuvRange.ITU_NARROW
+test_full_spec.x_chroma_location = HolovizOp.ChromaLocation.MIDPOINT
+test_full_spec.y_chroma_location = HolovizOp.ChromaLocation.MIDPOINT
+test_full_spec.depth_map_render_mode = HolovizOp.DepthMapRenderMode.LINES
+test_view = HolovizOp.InputSpec.View()
+test_view.offset_x = 0.2
+test_view.offset_y = 1.3
+test_view.width = 4.0
+test_view.height = 2.8
+test_view.matrix = np.arange(16.0, dtype=float)
+test_full_spec.views = [test_view]
+test_input_specs.append(test_full_spec)
+
 
 class PingMessageTxOp(Operator):
     """Simple transmitter operator.
@@ -108,10 +138,30 @@ def _check_value(value, expected_value):
         assert z.shape == (16, 8, 4)
     elif isinstance(expected_value, list) and isinstance(expected_value[0], HolovizOp.InputSpec):
         assert isinstance(value, list)
-        assert len(value) == 2
+        assert len(value) == 3
         assert all(isinstance(v, HolovizOp.InputSpec) for v in value)
-        assert value[0].type == HolovizOp.InputType.TEXT
+        assert value[0].type == test_text_spec.type
+        assert value[0].color == test_text_spec.color
         assert value[1].type == HolovizOp.InputType.TRIANGLES
+        assert value[2].type == test_full_spec.type
+        assert value[2].color == test_full_spec.color
+        assert value[2].opacity == test_full_spec.opacity
+        assert value[2].priority == test_full_spec.priority
+        assert value[2].image_format == test_full_spec.image_format
+        assert value[2].line_width == test_full_spec.line_width
+        assert value[2].point_size == test_full_spec.point_size
+        assert value[2].text == test_full_spec.text
+        assert value[2].yuv_model_conversion == test_full_spec.yuv_model_conversion
+        assert value[2].yuv_range == test_full_spec.yuv_range
+        assert value[2].x_chroma_location == test_full_spec.x_chroma_location
+        assert value[2].y_chroma_location == test_full_spec.y_chroma_location
+        assert value[2].depth_map_render_mode == test_full_spec.depth_map_render_mode
+        assert all(isinstance(v, HolovizOp.InputSpec.View) for v in value[2].views)
+        assert value[2].views[0].offset_x == test_view.offset_x
+        assert value[2].views[0].offset_y == test_view.offset_y
+        assert value[2].views[0].width == test_view.width
+        assert value[2].views[0].height == test_view.height
+        assert value[2].views[0].matrix == test_view.matrix
 
 
 class PingMessageRxOp(Operator):
@@ -194,13 +244,7 @@ def test_ucx_object_serialization_app(ping_config_file, value, capfd):
     elif value in ["cupy", "cupy-complex", "cupy-tensormap"]:
         pytest.importorskip("cupy")
     elif value == "input_specs":
-        specs = []
-        text_spec = HolovizOp.InputSpec("dynamic_text", "text")
-        text_spec.text = ["Text1"]
-        text_spec.color = [1.0, 0.0, 0.0, 1.0]
-        specs.append(text_spec)
-        specs.append(HolovizOp.InputSpec("triangles", HolovizOp.InputType.TRIANGLES))
-        value = specs
+        value = test_input_specs
 
     app = MultiFragmentPyObjectPingApp(value=value)
     app.run()
@@ -285,13 +329,7 @@ def test_ucx_object_receivers_serialization_app(ping_config_file, value, capfd):
     elif value == "cupy":
         pytest.importorskip("cupy")
     elif value == "input_specs":
-        specs = []
-        text_spec = HolovizOp.InputSpec("dynamic_text", "text")
-        text_spec.text = ["Text1"]
-        text_spec.color = [1.0, 0.0, 0.0, 1.0]
-        specs.append(text_spec)
-        specs.append(HolovizOp.InputSpec("triangles", HolovizOp.InputType.TRIANGLES))
-        value = specs
+        value = test_input_specs
 
     app = MultiFragmentPyObjectReceiversPingApp(value=value)
     app.run()
diff --git a/python/tests/system/test_application_minimal.py b/python/tests/system/test_application_minimal.py
index ffb6dfa..7af40db 100644
--- a/python/tests/system/test_application_minimal.py
+++ b/python/tests/system/test_application_minimal.py
@@ -32,6 +32,7 @@ def __init__(self, *args, **kwargs):
 
     def initialize(self):
         print("** initialize method called **")
+        print(f"initialize(): param_value = {self.param_value}")
 
     def start(self):
         print("** start method called **")
@@ -71,6 +72,9 @@ def test_minimal_app(ping_config_file, SchedulerClass, capfd):  # noqa: N803
     assert captured.out.count("** start method called **") == 1
     assert captured.out.count("** stop method called **") == 1
 
+    # verity if parameter value is set
+    assert "initialize(): param_value = 500" in captured.out
+
 
 @pytest.mark.parametrize(
     "SchedulerClass", [EventBasedScheduler, GreedyScheduler, MultiThreadScheduler]
diff --git a/python/tests/system/test_application_with_repeated_emit_on_same_port.py b/python/tests/system/test_application_with_repeated_emit_on_same_port.py
index cae7ac8..0ed658a 100644
--- a/python/tests/system/test_application_with_repeated_emit_on_same_port.py
+++ b/python/tests/system/test_application_with_repeated_emit_on_same_port.py
@@ -14,11 +14,13 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """  # noqa: E501
+import os
 
 import pytest
+from env_wrapper import env_var_context
 
 from holoscan.conditions import CountCondition
-from holoscan.core import Application, ConditionType, IOSpec, Operator, OperatorSpec
+from holoscan.core import Application, ConditionType, IOSpec, Operator, OperatorSpec, Tracker
 
 
 class PingMultiQueueTxOp(Operator):
@@ -42,10 +44,11 @@ def __init__(
         super().__init__(fragment, *args, **kwargs)
 
     def setup(self, spec: OperatorSpec):
+        policy_kwarg = dict() if self.queue_policy is None else {"policy": self.queue_policy}
         spec.output("out").connector(
             IOSpec.ConnectorType.DOUBLE_BUFFER,
             capacity=self.queue_capacity,
-            policy=self.queue_policy,
+            **policy_kwarg,
         ).condition(
             ConditionType.DOWNSTREAM_MESSAGE_AFFORDABLE,
             min_size=self.queue_capacity,
@@ -89,10 +92,11 @@ def __init__(
         super().__init__(fragment, *args, **kwargs)
 
     def setup(self, spec: OperatorSpec):
+        policy_kwarg = dict() if self.queue_policy is None else {"policy": self.queue_policy}
         spec.input("in").connector(
             IOSpec.ConnectorType.DOUBLE_BUFFER,
             capacity=self.queue_capacity,
-            policy=self.queue_policy,
+            **policy_kwarg,
         ).condition(
             ConditionType.MESSAGE_AVAILABLE,
             min_size=self.queue_capacity,
@@ -102,8 +106,20 @@ def setup(self, spec: OperatorSpec):
     def compute(self, op_input, op_output, context):
         offset = self.count * self.queue_capacity
         expected_values = tuple(offset + i for i in range(1, self.queue_capacity + 1))
+        queue_policy = self.queue_policy
+        if queue_policy is None:
+            # Need to know what the environment variable was set to in order to validate the
+            # expected result.
+            queue_policy = os.environ.get("HOLOSCAN_QUEUE_POLICY", "fail").lower()
+            if queue_policy == "pop":
+                queue_policy = 0
+            elif queue_policy == "reject":
+                queue_policy = 1
+            else:
+                queue_policy = 2
+
         if self.extra_emit and self.queue_capacity > 1:  # noqa: SIM102
-            if self.queue_policy == 0:
+            if queue_policy == 0:
                 # Policy 0 is 'pop', so the first value would have been popped from the queue.
                 # For `PingMultiQueueTxOp`, the last value is repeated.
                 expected_values = expected_values[1:] + (expected_values[-1],)
@@ -202,7 +218,7 @@ def test_ping_app_with_larger_queue(queue_capacity, capfd):
     assert f"PingMultiQueueRxOp has been called {count} times." in captured.out
 
 
-@pytest.mark.parametrize("queue_policy", [0, 1, 2])
+@pytest.mark.parametrize("queue_policy", [0, 1, 2, None])
 @pytest.mark.parametrize("queue_capacity", [1, 2, 5])
 def test_ping_app_with_larger_queue_and_extra_emit(queue_capacity, queue_policy, capfd):
     """Test different queue policies when there is an extra emit call on the Transmitter"""
@@ -226,7 +242,7 @@ def test_ping_app_with_larger_queue_and_extra_emit(queue_capacity, queue_policy,
         assert "error" not in captured.err
         assert "GXF_EXCEEDING_PREALLOCATED_SIZE" not in captured.err
     except RuntimeError as err:
-        if queue_policy == 2:
+        if queue_policy is None or queue_policy == 2:
             # policy 2 = fault, so app will terminate on the first compute call to
             # PingMultiQueueTxOp when `extra_emit=True` as above.
             captured = capfd.readouterr()
@@ -236,3 +252,48 @@ def test_ping_app_with_larger_queue_and_extra_emit(queue_capacity, queue_policy,
             assert "GXF_EXCEEDING_PREALLOCATED_SIZE" in captured.err
         else:
             raise RuntimeError(f"unexpected Runtime error: {err}") from err
+
+
+@pytest.mark.parametrize("policy_name", ["Pop", "Reject", "Fail"])
+@pytest.mark.parametrize("flow_tracking_enabled", [False, True])
+def test_ping_app_with_queue_policy_from_environment(policy_name, flow_tracking_enabled, capfd):
+    """Test different queue policies when there is an extra emit call on the Transmitter"""
+    count = 3
+    app = MyCustomQueuePingApp(
+        count=count,
+        queue_capacity=2,
+        queue_policy=None,  # None to use HOLOSCAN_QUEUE_POLICY environment variable
+        extra_emit=True,
+    )
+    env_var_settings = {
+        ("HOLOSCAN_QUEUE_POLICY", policy_name),
+    }
+    with env_var_context(env_var_settings):
+        try:
+            if flow_tracking_enabled:
+                with Tracker(app) as tracker:
+                    app.run()
+                    tracker.print()
+            else:
+                app.run()
+
+            captured = capfd.readouterr()
+
+            # Assert that `PingMultiQueueRxOp.compute` was called the expected number of times
+            # (The compute method itself confirms the received values)
+            assert f"PingMultiQueueRxOp has been called {count} times." in captured.out
+
+            # queue size error was not logged
+            assert "error" not in captured.err
+            assert "GXF_EXCEEDING_PREALLOCATED_SIZE" not in captured.err
+        except RuntimeError as err:
+            if policy_name.lower() == "fail":
+                # policy 2 = fault, so app will terminate on the first compute call to
+                # PingMultiQueueTxOp when `extra_emit=True` as above.
+                captured = capfd.readouterr()
+
+                # verify that an error about exceeding the queue size was logged
+                assert "error" in captured.err
+                assert "GXF_EXCEEDING_PREALLOCATED_SIZE" in captured.err
+            else:
+                raise RuntimeError(f"unexpected Runtime error: {err}") from err
diff --git a/python/tests/system/test_operator_metadata_apps.py b/python/tests/system/test_operator_metadata_apps.py
new file mode 100644
index 0000000..ac38920
--- /dev/null
+++ b/python/tests/system/test_operator_metadata_apps.py
@@ -0,0 +1,227 @@
+"""
+ SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ SPDX-License-Identifier: Apache-2.0
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""  # noqa: E501
+import pytest
+
+from holoscan.conditions import CountCondition
+from holoscan.core import Application, MetadataPolicy, Operator, OperatorSpec
+from holoscan.schedulers import EventBasedScheduler, GreedyScheduler, MultiThreadScheduler
+
+
+class PingMetadataTxOp(Operator):
+    def __init__(self, fragment, *args, **kwargs):
+        self.index = 0
+        # Need to call the base class constructor last
+        super().__init__(fragment, *args, **kwargs)
+
+    def setup(self, spec: OperatorSpec):
+        spec.output("out1")
+        spec.output("out2")
+
+    def compute(self, op_input, op_output, context):
+        self.index += 1
+        meta = self.metadata
+        if self.is_metadata_enabled:
+            meta["channel_1_id"] = "odds"
+        op_output.emit(self.index, "out1")
+
+        self.index += 1
+        if self.is_metadata_enabled:
+            del meta["channel_1_id"]
+            meta["channel_2_id"] = "evens"
+        op_output.emit(self.index, "out2")
+
+
+class PingMetadataMiddleOp(Operator):
+    def __init__(self, fragment, multiplier=2, *args, **kwargs):
+        self.count = 1
+        self.multiplier = multiplier
+
+        # Need to call the base class constructor last
+        super().__init__(fragment, *args, **kwargs)
+
+    def setup(self, spec: OperatorSpec):
+        spec.input("in1")
+        spec.input("in2")
+        spec.output("out1")
+        spec.output("out2")
+
+    def compute(self, op_input, op_output, context):
+        value1 = op_input.receive("in1")
+        value2 = op_input.receive("in2")
+        self.count += 1
+
+        # add the multiplier parameter used in the metadata
+        if self.is_metadata_enabled:
+            self.metadata["multiplier"] = self.multiplier
+
+        op_output.emit(value1 * self.multiplier, "out1")
+        op_output.emit(value2 * self.multiplier, "out2")
+
+
+class PingMetadataRxOp(Operator):
+    def __init__(self, fragment, *args, **kwargs):
+        self.count = 1
+        # Need to call the base class constructor last
+        super().__init__(fragment, *args, **kwargs)
+
+    def setup(self, spec: OperatorSpec):
+        spec.param("receivers", kind="receivers")
+
+    def compute(self, op_input, op_output, context):
+        values = op_input.receive("receivers")
+        print(f"rx: {len(values)=}")
+        assert values is not None
+        print(f"received message {self.count}")
+        self.count += 1
+        if self.is_metadata_enabled:
+            print("metadata is enabled")
+            assert "multiplier" in self.metadata
+            assert "channel_1_id" in self.metadata
+            assert "channel_2_id" in self.metadata
+        else:
+            print("metadata is disabled")
+            assert "multiplier" not in self.metadata
+            assert "channel_1_id" not in self.metadata
+            assert "channel_2_id" not in self.metadata
+
+
+class MyPingApp(Application):
+    def __init__(self, *args, count=10, **kwargs):
+        self.count = count
+        super().__init__(*args, **kwargs)
+
+    def compose(self):
+        tx = PingMetadataTxOp(
+            self,
+            CountCondition(self, self.count),
+            name="tx",
+        )
+        mx = PingMetadataMiddleOp(self, name="mx")
+        rx = PingMetadataRxOp(self, name="rx")
+        rx.metadata_policy = MetadataPolicy.UPDATE
+        self.add_flow(tx, mx, {("out1", "in1"), ("out2", "in2")})
+        self.add_flow(mx, rx, {("out1", "receivers"), ("out2", "receivers")})
+
+
+class MyPingParallelApp(Application):
+    """
+    App with one transmitter, tx, being broadcast to 4 parallel branches that have two multiplier
+    operators each. The outputs of all parallel branches connect to a common receiver, rx.
+
+          | -> mx11 -> mx21 -> |
+          | -> mx12 -> mx22 -> |
+    tx -> | -> mx13 -> mx23 -> | -> rx
+          | -> mx14 -> mx24 -> |
+
+    """
+
+    def __init__(self, *args, count=10, rx_policy=MetadataPolicy.RAISE, **kwargs):
+        self.count = count
+        self.rx_policy = rx_policy
+        super().__init__(*args, **kwargs)
+
+    def compose(self):
+        tx = PingMetadataTxOp(
+            self,
+            CountCondition(self, self.count),
+            name="tx",
+        )
+        mx11 = PingMetadataMiddleOp(self, name="mx11")
+        mx11.metadata_policy = MetadataPolicy.UPDATE
+        mx12 = PingMetadataMiddleOp(self, name="mx12")
+        mx12.metadata_policy = MetadataPolicy.UPDATE
+        mx13 = PingMetadataMiddleOp(self, name="mx13")
+        mx13.metadata_policy = MetadataPolicy.UPDATE
+        mx14 = PingMetadataMiddleOp(self, name="mx14")
+        mx14.metadata_policy = MetadataPolicy.UPDATE
+        mx21 = PingMetadataMiddleOp(self, name="mx21")
+        mx21.metadata_policy = MetadataPolicy.UPDATE
+        mx22 = PingMetadataMiddleOp(self, name="mx22")
+        mx22.metadata_policy = MetadataPolicy.UPDATE
+        mx23 = PingMetadataMiddleOp(self, name="mx23")
+        mx23.metadata_policy = MetadataPolicy.UPDATE
+        mx24 = PingMetadataMiddleOp(self, name="mx24")
+        mx24.metadata_policy = MetadataPolicy.UPDATE
+        rx = PingMetadataRxOp(self, name="rx")
+        # leave at default policy for rx_policy=None
+        if self.rx_policy is None:
+            assert rx.metadata_policy == MetadataPolicy.RAISE
+        else:
+            rx.metadata_policy = self.rx_policy
+
+        self.add_flow(tx, mx11, {("out1", "in1"), ("out2", "in2")})
+        self.add_flow(tx, mx12, {("out1", "in1"), ("out2", "in2")})
+        self.add_flow(tx, mx13, {("out1", "in1"), ("out2", "in2")})
+        self.add_flow(tx, mx14, {("out1", "in1"), ("out2", "in2")})
+        self.add_flow(mx11, mx21, {("out1", "in1"), ("out2", "in2")})
+        self.add_flow(mx12, mx22, {("out1", "in1"), ("out2", "in2")})
+        self.add_flow(mx13, mx23, {("out1", "in1"), ("out2", "in2")})
+        self.add_flow(mx14, mx24, {("out1", "in1"), ("out2", "in2")})
+        self.add_flow(mx21, rx, {("out1", "receivers"), ("out2", "receivers")})
+        self.add_flow(mx22, rx, {("out1", "receivers"), ("out2", "receivers")})
+        self.add_flow(mx23, rx, {("out1", "receivers"), ("out2", "receivers")})
+        self.add_flow(mx24, rx, {("out1", "receivers"), ("out2", "receivers")})
+
+
+@pytest.mark.parametrize("is_metadata_enabled", [False, True])
+def test_my_ping_app(capfd, is_metadata_enabled):
+    count = 100
+    app = MyPingApp(count=count)
+    app.is_metadata_enabled = is_metadata_enabled
+    app.run()
+
+    # assert that the expected number of messages were received
+    captured = capfd.readouterr()
+
+    assert "rx: len(values)=2" in captured.out
+    assert f"received message {count}" in captured.out
+    assert f"received message {count + 1}" not in captured.out
+    assert f"metadata is {'enabled' if is_metadata_enabled else 'disabled'}" in captured.out
+
+
+@pytest.mark.parametrize(
+    "scheduler_class", [GreedyScheduler, MultiThreadScheduler, EventBasedScheduler]
+)
+@pytest.mark.parametrize("is_metadata_enabled", [False, True])
+@pytest.mark.parametrize(
+    "update_policy", [MetadataPolicy.UPDATE, MetadataPolicy.RAISE, MetadataPolicy.REJECT, None]
+)
+def test_my_ping_parallel_app(capfd, scheduler_class, is_metadata_enabled, update_policy):
+    count = 3
+    app = MyPingParallelApp(count=count, rx_policy=update_policy)
+    app.is_metadata_enabled = is_metadata_enabled
+
+    app.scheduler(scheduler_class(app))
+    if is_metadata_enabled and (update_policy is None or update_policy == MetadataPolicy.RAISE):
+        with pytest.raises(RuntimeError):
+            app.run()
+
+        # assert that the expected error message was logged
+        captured = capfd.readouterr()
+        assert "duplicate metadata keys" in captured.err
+
+        return
+    else:
+        app.run()
+
+        # assert that the expected number of messages were received
+        captured = capfd.readouterr()
+
+        assert "rx: len(values)=8" in captured.out
+        assert f"received message {count}" in captured.out
+        assert f"received message {count + 1}" not in captured.out
+        assert f"metadata is {'enabled' if is_metadata_enabled else 'disabled'}" in captured.out
diff --git a/python/tests/system/test_operator_tensor_validation.py b/python/tests/system/test_operator_tensor_validation.py
index 32cd211..98251ec 100644
--- a/python/tests/system/test_operator_tensor_validation.py
+++ b/python/tests/system/test_operator_tensor_validation.py
@@ -42,7 +42,7 @@ def __init__(
         *args,
         width=800,
         height=640,
-        channels=3,
+        channels=1,
         on_host=True,
         fortran_ordered=False,
         dtype=np.uint8,
@@ -152,7 +152,7 @@ def __init__(
         count=10,
         width=800,
         height=640,
-        channels=3,
+        channels=1,
         on_host=False,
         fortran_ordered=False,
         **kwargs,
@@ -204,7 +204,7 @@ def compose(self):
 
 
 @pytest.mark.parametrize("fortran_ordered, on_host", [(False, False), (True, False), (False, True)])
-@pytest.mark.parametrize("channels", [3, None])
+@pytest.mark.parametrize("channels", [1, None])
 def test_bayer_demosaic_memory_layout(fortran_ordered, on_host, channels, capfd):
     """Test HolovizOp with valid (row-major) and invalid (column-major) memory layouts."""
     count = 3
@@ -218,32 +218,22 @@ def test_bayer_demosaic_memory_layout(fortran_ordered, on_host, channels, capfd)
         on_host=on_host,
         fortran_ordered=fortran_ordered,
     )
-    if channels is None:
+    if fortran_ordered:
         with pytest.raises(RuntimeError):
             demosaic_app.run()
         captured = capfd.readouterr()
 
         # assert that app raised exception on the first frame
         assert captured.out.count("Emitting frame") == 1
-        assert "Input tensor has 2 dimensions. Expected a tensor with 3 dimensions" in captured.err
+        assert "Tensor must have a row-major memory layout" in captured.err
 
     else:
-        if fortran_ordered:
-            with pytest.raises(RuntimeError):
-                demosaic_app.run()
-            captured = capfd.readouterr()
-
-            # assert that app raised exception on the first frame
-            assert captured.out.count("Emitting frame") == 1
-            assert "Tensor must have a row-major memory layout" in captured.err
+        demosaic_app.run()
 
-        else:
-            demosaic_app.run()
-
-            captured = capfd.readouterr()
+        captured = capfd.readouterr()
 
-            # assert that replayer_app received all frames
-            assert captured.out.count("Emitting frame") == count
+        # assert that replayer_app received all frames
+        assert captured.out.count("Emitting frame") == count
 
 
 class SegmentationPostprocessorApp(Application):
diff --git a/python/tests/unit/test_core_metadata.py b/python/tests/unit/test_core_metadata.py
new file mode 100644
index 0000000..0770607
--- /dev/null
+++ b/python/tests/unit/test_core_metadata.py
@@ -0,0 +1,347 @@
+"""
+ SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ SPDX-License-Identifier: Apache-2.0
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""  # noqa: E501
+
+from collections.abc import Sequence
+
+import numpy as np
+import pytest
+
+from holoscan.core import MetadataDictionary, MetadataPolicy
+
+
+@pytest.mark.parametrize("name", ["REJECT", "UPDATE", "RAISE"])
+def test_policy_type(name):
+    assert hasattr(MetadataPolicy, name)
+
+
+class TestMetadataDictionary:
+    def test_init(self, capfd):
+        d = MetadataDictionary()
+        assert len(d) == 0
+        assert d.size() == 0
+
+    def test_invalid_init(self):
+        # cannot initialize with a Python dict
+        with pytest.raises(TypeError):
+            MetadataDictionary({"a": 1, "b": 2})
+
+    def test_container_dunder_methods(self):
+        d = MetadataDictionary()
+
+        # test __setitem__ and __getitem__
+        # Note: __setitem__ always stores values as Python objects.
+        d["key1"] = 5
+        d["key2"] = "value2"
+        pydict = {"a": 1, "b": 2}
+        d["key3"] = pydict
+
+        # test __getitem__
+        assert isinstance(d["key1"], int)
+        assert d["key1"] == 5
+        assert isinstance(d["key2"], str)
+        assert d["key2"] == "value2"
+        assert isinstance(d["key3"], dict)
+        assert d["key3"] == pydict
+        with pytest.raises(KeyError):
+            d["key4"]
+
+        # updating the pydict dictionary also affects the stored value in MetadataDictionary
+        pydict["c"] = [1, 2, 3]
+        assert len(d["key3"]) == 3
+        assert d["key3"] == pydict
+
+        # test __len__
+        assert len(d) == 3 == d.size()
+
+        # test __contains__
+        assert not d.has_key("key4")
+        assert "key4" not in d
+        assert "key1" in d
+        assert "key2" in d
+        assert "key3" in d
+
+        d.erase("key2")
+        assert len(d) == 2
+        assert "key2" not in d
+
+        # test __delitem__
+        del d["key1"]
+        assert len(d) == 1
+        assert "key1" not in d
+
+    def test_pop(self):
+        d = MetadataDictionary()
+
+        # test __setitem__ and __getitem__
+        # Note: __setitem__ always stores values as Python objects.
+        val2 = "value2"
+        d["key1"] = 5
+        d["key2"] = val2
+        popped = d.pop("key2")
+        assert popped == val2
+        assert "key2" not in d
+        assert len(d) == 1
+
+        with pytest.raises(KeyError):
+            d.pop("key2")
+
+        popped = d.pop("key2", 10)
+        assert len(d) == 1
+        assert popped == 10
+
+        popped = d.pop("key2", None)
+        assert len(d) == 1
+        assert popped is None
+
+    def test_clear(self):
+        d = MetadataDictionary()
+
+        d["key1"] = 1
+        d["key2"] = 2
+        d["key3"] = 3
+        assert len(d) == 3
+        d.clear()
+        assert len(d) == 0
+
+    def test_keys(self):
+        d = MetadataDictionary()
+
+        d["k2"] = 2
+        d["k1"] = 1
+        d["k3"] = 3
+        # underlying storage is unordered_map, so sort keys to compare
+        assert sorted(d.keys()) == sorted(["k2", "k1", "k3"])
+
+    @pytest.mark.parametrize(
+        "method, policy",
+        [
+            ("insert", None),
+            ("merge", None),
+            ("update", MetadataPolicy.REJECT),
+            ("update", MetadataPolicy.UPDATE),
+            ("update", None),  # policy is RAISE if not otherwise specified
+            ("update", MetadataPolicy.RAISE),
+        ],
+    )
+    def test_insert_merge_update_swap(self, method, policy):
+        d = MetadataDictionary()
+        d["key1"] = 1
+        d["key2"] = 2
+        d["key3"] = 3
+        assert len(d) == 3
+
+        d2 = MetadataDictionary()
+        d2["key1"] = 10
+        d2["key4"] = 40
+        d2["key5"] = 50
+        assert len(d2) == 3
+
+        if policy is not None:
+            d.policy = policy
+            d2.policy = policy
+
+        if method == "insert":
+            d.insert(d2)
+            assert len(d) == 5
+            assert len(d2) == 3
+            assert d["key1"] == 1  # kept the original value
+            assert "key5" in d
+        elif method == "merge":
+            d.merge(d2)
+            assert len(d) == 5
+            assert len(d2) == 1  # merged items are no longer in
+            assert d["key1"] == 1  # kept the original value
+            assert "key5" in d
+        elif method == "update":
+            if policy is None or policy == MetadataPolicy.RAISE:
+                with pytest.raises(RuntimeError):
+                    d.update(d2)
+            else:
+                d.update(d2)
+                assert len(d) == 5
+                assert len(d2) == 3
+                if policy == MetadataPolicy.REJECT:
+                    assert d["key1"] == 1  # kept the original value
+                else:
+                    assert d["key1"] == 10  # updated to the new value
+                assert "key5" in d
+
+        # swap contents
+        size1 = len(d)
+        size2 = len(d2)
+        d2.swap(d)
+        assert len(d2) == size1
+        assert len(d) == size2
+
+    def test_item_type_names(self):
+        d = MetadataDictionary()
+        d.set("key1", 1, cast_to_cpp=True)
+        d.set("key2", 1.5, cast_to_cpp=True)
+        d["key3"] = dict(a=1, b=2)
+        type_dict = d.type_dict()
+        assert len(type_dict) == 3
+        assert isinstance(type_dict, dict)
+        assert all(isinstance(k, str) and isinstance(v, str) for k, v in type_dict.items())
+
+    def test_items(self):
+        d = MetadataDictionary()
+        d["key1"] = 1
+        d["key2"] = [2, 3]
+        d["key3"] = dict(a=1, b=2)
+        assert len(d) == 3
+
+        count = 0
+        for k, v in d.items():
+            count += 1
+            if k == "key1":
+                assert v == 1
+            elif k == "key2":
+                assert v == [2, 3]
+            elif k == "key3":
+                assert v == dict(a=1, b=2)
+        assert count == 3
+        assert len(d) == 3
+
+    @pytest.mark.parametrize(
+        "dtype",
+        [
+            np.dtype("bool"),
+            np.dtype("int8"),
+            np.dtype("int16"),
+            np.dtype("int32"),
+            np.dtype("int64"),
+            np.dtype("uint8"),
+            np.dtype("uint16"),
+            np.dtype("uint32"),
+            np.dtype("uint64"),
+            np.dtype("float32"),
+            np.dtype("float64"),
+            np.dtype("complex64"),
+            np.dtype("complex128"),
+        ],
+    )
+    @pytest.mark.parametrize("cast_to_cpp", [True, False])
+    def test_dtype(self, dtype, cast_to_cpp):
+        d = MetadataDictionary()
+        value = 5
+        d.set("x", value, dtype=dtype, cast_to_cpp=cast_to_cpp)
+
+        if cast_to_cpp:
+            x = d["x"]
+            dtype = np.dtype(dtype)
+            if dtype.kind == "b":
+                assert x == bool(value)
+            else:
+                assert x == value
+
+            if dtype.kind == "b":
+                assert isinstance(x, bool)
+            elif dtype.kind in "iu":
+                assert isinstance(x, int)
+            elif dtype.kind in "efg":
+                assert isinstance(x, float)
+            elif dtype.kind in "FG":
+                assert isinstance(x, complex)
+        else:
+            # with cast_to_cpp = False, dtype has no effect. The Python int object is stored.
+            x = d["x"]
+            assert isinstance(x, int)
+            assert x == value
+
+    @pytest.mark.parametrize("value", [(1, 2, 3), [1, 2, 3], {1, 2, 3}, range(1, 4)])
+    @pytest.mark.parametrize("cast_to_cpp", [True, False])
+    def test_iterable(self, value, cast_to_cpp):
+        d = MetadataDictionary()
+        d.set("x", value, cast_to_cpp=cast_to_cpp)
+
+        if cast_to_cpp:
+            x = d["x"]
+            assert isinstance(x, list)
+            assert all(isinstance(x[i], int) for i in range(len(value)))
+            assert x == [1, 2, 3]
+
+        else:
+            # with cast_to_cpp = False, dtype has no effect. The Python object is stored.
+            x = d["x"]
+            assert isinstance(x, type(value))
+            assert x == value
+            assert x is value
+
+    @pytest.mark.parametrize(
+        "value",
+        [
+            np.array([1, 2, 3], dtype=np.int32),
+            np.array([1, 2, 3], dtype=np.float32),
+            np.array([1, 2, 3], dtype=np.complex64),
+        ],
+    )
+    @pytest.mark.parametrize("cast_to_cpp", [True, False])
+    def test_numpy_array_values(self, value, cast_to_cpp):
+        d = MetadataDictionary()
+        d.set("x", value, cast_to_cpp=cast_to_cpp)
+
+        if cast_to_cpp:
+            x = d["x"]
+            assert isinstance(x, list)
+            if value.dtype.kind in "iu":
+                assert all(isinstance(x[i], int) for i in range(len(value)))
+            elif value.dtype.kind in "fg":
+                assert all(isinstance(x[i], float) for i in range(len(value)))
+            elif value.dtype.kind in "FG":
+                assert all(isinstance(x[i], complex) for i in range(len(value)))
+            assert x == [1, 2, 3]
+        else:
+            # with cast_to_cpp = False, dtype has no effect. The Python object is stored.
+            x = d["x"]
+            assert isinstance(x, type(value))
+            np.testing.assert_array_equal(x, value)
+            assert x is value
+
+    @pytest.mark.parametrize(
+        "value",
+        [
+            "abc",
+            ("abc", "def", "ghi"),
+            (("a", "b", "c"), ["d", "e"], {"f", "g", "h", "i"}),
+        ],
+    )
+    @pytest.mark.parametrize("cast_to_cpp", [True, False])
+    def test_string_vector(self, value, cast_to_cpp):
+        d = MetadataDictionary()
+        d.set("x", value, cast_to_cpp=cast_to_cpp)
+
+        if cast_to_cpp:
+            x = d["x"]
+            if isinstance(value, str):
+                assert isinstance(x, str)
+                assert x == value
+            elif isinstance(value, Sequence) and isinstance(value[0], str):
+                # stored as vector<string> in C++ -> becomes list[str] in Python
+                assert isinstance(x, list)
+                assert all(isinstance(x[i], str) for i in range(len(value)))
+                assert x == list(value)
+            elif isinstance(value, Sequence) and isinstance(value[0], Sequence):
+                # stored as vector<vector<string>> in C++ -> becomes list[list[str]] in Python
+                assert isinstance(x, list)
+                assert all(isinstance(x[i], list) for i in range(len(value)))
+                assert x == list(list(v) for v in value)
+        else:
+            # with cast_to_cpp = False, dtype has no effect. The Python object is stored.
+            x = d["x"]
+            assert isinstance(x, type(value))
+            assert x == value
+            assert x is value
diff --git a/python/tests/unit/test_operators_native.py b/python/tests/unit/test_operators_native.py
index acb48ec..a2e2692 100644
--- a/python/tests/unit/test_operators_native.py
+++ b/python/tests/unit/test_operators_native.py
@@ -22,7 +22,16 @@
 
 import holoscan.operators
 from holoscan.conditions import CountCondition
-from holoscan.core import Application, Arg, Operator, OperatorSpec, Tensor, _Operator
+from holoscan.core import (
+    Application,
+    Arg,
+    MetadataDictionary,
+    MetadataPolicy,
+    Operator,
+    OperatorSpec,
+    Tensor,
+    _Operator,
+)
 from holoscan.core._core import OperatorSpec as OperatorSpecBase
 from holoscan.gxf import Entity
 
@@ -30,20 +39,27 @@
 with suppress(ImportError):
     from holoscan.operators.aja_source import AJASourceOp, NTV2Channel
 
-from holoscan.operators.bayer_demosaic import BayerDemosaicOp
-from holoscan.operators.format_converter import FormatConverterOp
-from holoscan.operators.holoviz import (
+from holoscan.operators import (
+    BayerDemosaicOp,
+    FormatConverterOp,
     HolovizOp,
+    InferenceOp,
+    InferenceProcessorOp,
+    PingTensorRxOp,
+    PingTensorTxOp,
+    SegmentationPostprocessorOp,
+    V4L2VideoCaptureOp,
+    VideoStreamRecorderOp,
+    VideoStreamReplayerOp,
+)
+from holoscan.operators.holoviz import (
+    _holoviz_str_to_chroma_location,
     _holoviz_str_to_depth_map_render_mode,
     _holoviz_str_to_image_format,
     _holoviz_str_to_input_type,
+    _holoviz_str_to_yuv_model_conversion,
+    _holoviz_str_to_yuv_range,
 )
-from holoscan.operators.inference import InferenceOp
-from holoscan.operators.inference_processor import InferenceProcessorOp
-from holoscan.operators.segmentation_postprocessor import SegmentationPostprocessorOp
-from holoscan.operators.v4l2_video_capture import V4L2VideoCaptureOp
-from holoscan.operators.video_stream_recorder import VideoStreamRecorderOp
-from holoscan.operators.video_stream_replayer import VideoStreamReplayerOp
 from holoscan.resources import (
     BlockMemoryPool,
     CudaStreamPool,
@@ -79,12 +95,27 @@ def test_invalid_init2(self, fragment):
         with pytest.raises(RuntimeError):
             Operator(fragment, fragment)
 
-    def test_basic_init(self, fragment, capfd):
+    def test_basic_init(self, fragment):
         op = Operator(fragment)
         assert op.name.startswith("unnamed_operator")
         assert op.fragment is fragment
         assert op.operator_type == Operator.OperatorType.NATIVE
-        capfd.readouterr()
+
+    def test_metadata(self, fragment):
+        op = Operator(fragment)
+        assert op.metadata_policy == MetadataPolicy.RAISE
+        assert isinstance(op.metadata, MetadataDictionary)
+        meta = op.metadata
+        assert len(meta) == 0
+        meta["name"] = "abcd"
+        op.metadata["age"] = 50
+        assert len(op.metadata) == 2
+        assert "name" in op.metadata
+        assert "age" in op.metadata
+
+        # metadata transmission is disabled by default
+        assert not fragment.is_metadata_enabled
+        assert not op.is_metadata_enabled
 
     def test_basic_kwarg_init(self, fragment, capfd):
         op = Operator(fragment=fragment)
@@ -597,12 +628,58 @@ def test_holoviz_input_types(type_str):
         "b8g8r8a8_srgb",
         "a8b8g8r8_unorm_pack32",
         "a8b8g8r8_srgb_pack32",
+        "y8u8y8v8_422_unorm",
+        "u8y8v8y8_422_unorm",
+        "y8_u8v8_2plane_420_unorm",
+        "y8_u8v8_2plane_422_unorm",
+        "y8_u8_v8_3plane_420_unorm",
+        "y8_u8_v8_3plane_422_unorm",
+        "y16_u16v16_2plane_420_unorm",
+        "y16_u16v16_2plane_422_unorm",
+        "y16_u16_v16_3plane_420_unorm",
+        "y16_u16_v16_3plane_422_unorm",
     ],
 )
 def test_holoviz_image_formats(image_format_str):
     assert isinstance(_holoviz_str_to_image_format[image_format_str], HolovizOp.ImageFormat)
 
 
+@pytest.mark.parametrize(
+    "yuv_model_conversion",
+    [
+        "yuv_601",
+        "yuv_709",
+        "yuv_2020",
+    ],
+)
+def test_holoviz_yuv_model_conversions(yuv_model_conversion):
+    assert isinstance(
+        _holoviz_str_to_yuv_model_conversion[yuv_model_conversion], HolovizOp.YuvModelConversion
+    )
+
+
+@pytest.mark.parametrize(
+    "yuv_range",
+    [
+        "itu_full",
+        "itu_narrow",
+    ],
+)
+def test_holoviz_yuv_ranges(yuv_range):
+    assert isinstance(_holoviz_str_to_yuv_range[yuv_range], HolovizOp.YuvRange)
+
+
+@pytest.mark.parametrize(
+    "chroma_location",
+    [
+        "cosited_even",
+        "midpoint",
+    ],
+)
+def test_holoviz_chroma_locations(chroma_location):
+    assert isinstance(_holoviz_str_to_chroma_location[chroma_location], HolovizOp.ChromaLocation)
+
+
 @pytest.mark.parametrize(
     "depth_type_str",
     [
@@ -680,6 +757,50 @@ def test_text(self):
 
         spec.text = ["abc", "de", "fghij"]
 
+    def test_yuv_model_conversion(self):
+        spec = HolovizOp.InputSpec("tensor1", HolovizOp.InputType.COLOR)
+
+        yuv_model_conversion = spec.yuv_model_conversion
+        assert yuv_model_conversion == HolovizOp.YuvModelConversion.YUV_601
+
+        spec.yuv_model_conversion = HolovizOp.YuvModelConversion.YUV_709
+
+        with pytest.raises(TypeError):
+            spec.yuv_model_conversion = 0
+
+    def test_yuv_range(self):
+        spec = HolovizOp.InputSpec("tensor1", HolovizOp.InputType.COLOR)
+
+        yuv_range = spec.yuv_range
+        assert yuv_range == HolovizOp.YuvRange.ITU_FULL
+
+        spec.yuv_range = HolovizOp.YuvRange.ITU_NARROW
+
+        with pytest.raises(TypeError):
+            spec.yuv_range = 0
+
+    def test_x_chroma_location(self):
+        spec = HolovizOp.InputSpec("tensor1", HolovizOp.InputType.COLOR)
+
+        x_chroma_location = spec.x_chroma_location
+        assert x_chroma_location == HolovizOp.ChromaLocation.COSITED_EVEN
+
+        spec.x_chroma_location = HolovizOp.ChromaLocation.MIDPOINT
+
+        with pytest.raises(TypeError):
+            spec.x_chroma_location = 0
+
+    def test_y_chroma_location(self):
+        spec = HolovizOp.InputSpec("tensor1", HolovizOp.InputType.COLOR)
+
+        y_chroma_location = spec.y_chroma_location
+        assert y_chroma_location == HolovizOp.ChromaLocation.COSITED_EVEN
+
+        spec.y_chroma_location = HolovizOp.ChromaLocation.MIDPOINT
+
+        with pytest.raises(TypeError):
+            spec.y_chroma_location = 0
+
     def test_depth_map_render_mode(self):
         spec = HolovizOp.InputSpec("tensor1", HolovizOp.InputType.DEPTH_MAP)
 
@@ -987,13 +1108,14 @@ def test_kwarg_based_initialization(self, app, capfd):
             width=320,
             height=240,
             pixel_format="auto",
+            pass_through=False,
             device="/dev/video0",
             allocator=UnboundedAllocator(app, name="pool"),
             exposure_time=500,
             gain=100,
         )
         assert isinstance(op, _Operator)
-        assert len(op.args) == 8
+        assert len(op.args) == 9
         assert op.operator_type == Operator.OperatorType.NATIVE
         assert f"name: {name}" in repr(op)
 
@@ -1010,7 +1132,7 @@ def test_default_initialization(self, app, capfd):
             allocator=UnboundedAllocator(app, name="pool"),
         )
         assert isinstance(op, _Operator)
-        assert len(op.args) == 6  # No hardcoded defaults for exposure and gain
+        assert len(op.args) == 7  # No hardcoded defaults for exposure and gain
         assert op.operator_type == Operator.OperatorType.NATIVE
         assert f"name: {name}" in repr(op)
 
@@ -1030,7 +1152,95 @@ def test_initialization_from_yaml(self, app, config_file, capfd):
             **app.kwargs("v4l2_video_capture"),
         )
         assert isinstance(op, _Operator)
-        assert len(op.args) == 8
+        assert len(op.args) == 9
+        assert op.operator_type == Operator.OperatorType.NATIVE
+        assert f"name: {name}" in repr(op)
+
+        # assert no warnings or errors logged
+        captured = capfd.readouterr()
+        assert "error" not in captured.err
+        assert "warning" not in captured.err
+
+
+class TestPingTensorRxOp:
+    def test_default_initialization(self, app, capfd):
+        name = "tensor_rx"
+        op = PingTensorRxOp(
+            fragment=app,
+            name=name,
+        )
+        assert isinstance(op, _Operator)
+        assert op.operator_type == Operator.OperatorType.NATIVE
+        assert f"name: {name}" in repr(op)
+
+        # assert no warnings or errors logged
+        captured = capfd.readouterr()
+        assert "error" not in captured.err
+
+        assert "warning" not in captured.err
+
+
+class TestPingTensorTxOp:
+    def test_default_initialization(self, app, capfd):
+        name = "tensor_tx"
+        op = PingTensorTxOp(
+            fragment=app,
+            name=name,
+        )
+        assert isinstance(op, _Operator)
+        assert op.operator_type == Operator.OperatorType.NATIVE
+        assert f"name: {name}" in repr(op)
+
+        # assert no warnings or errors logged
+        captured = capfd.readouterr()
+        assert "error" not in captured.err
+        assert "warning" not in captured.err
+
+    @pytest.mark.parametrize(
+        "dtype",
+        [
+            np.dtype("float32"),
+            np.dtype("float64"),
+            np.dtype("complex64"),
+            np.dtype("complex128"),
+            np.dtype("int8"),
+            np.dtype("int16"),
+            np.dtype("int32"),
+            np.dtype("int64"),
+            np.dtype("uint8"),
+            np.dtype("uint16"),
+            np.dtype("uint32"),
+            np.dtype("uint64"),
+            np.dtype("bool"),  # will use uint8_t
+            "float",
+            "double",
+            "complex<float>",
+            "complex<double>",
+            "int8_t",
+            "int16_t",
+            "int32_t",
+            "int64_t",
+            "uint8_t",
+            "uint16_t",
+            "uint32_t",
+            "uint64_t",
+        ],
+    )
+    def test_kwarg_based_initialization(self, app, capfd, dtype):
+        name = "tensor_tx"
+        op = PingTensorTxOp(
+            fragment=app,
+            allocator=UnboundedAllocator(app, name="alloc"),
+            storage_type="system",
+            batch_size=10,
+            rows=16,
+            columns=24,
+            channels=3,
+            dtype=dtype,
+            tensor_name="image",
+            name=name,
+        )
+        assert isinstance(op, _Operator)
         assert op.operator_type == Operator.OperatorType.NATIVE
         assert f"name: {name}" in repr(op)
 
diff --git a/run b/run
index 7921895..b8dfad2 100755
--- a/run
+++ b/run
@@ -36,6 +36,7 @@ init_globals() {
     # Options
     export DO_DRY_RUN="false"  # print commands but do not execute them. Used by run_command
     export DO_STANDALONE="false"  # do not run prerequisite functions
+    export NO_CACHE="" # by default, use cache
 
     # Define default img and dir names
     export SDK_BUILD_IMG="holoscan-sdk-build"
@@ -453,6 +454,7 @@ build_image() {
         --build-arg GPU_TYPE=${GPU} \
         --platform $(get_platform_str) \
         --network=host \
+        ${NO_CACHE} \
         ${docker_tags} \
         ${extra_args} \
         ${TOP}
@@ -714,6 +716,7 @@ build_run_image() {
         --platform $(get_platform_str) \
         --network=host \
         --target ${stage} \
+        ${NO_CACHE} \
         ${docker_tags} \
         ${extra_args[@]} \
         -f ${TOP}/runtime_docker/Dockerfile \
@@ -1134,7 +1137,7 @@ vscode() {
     local i
     local arg
     for i in "${!args[@]}"; do
-        if [ "$arg" = "--parallel" ]; then
+        if [ "${args[i]}" = "--parallel" ]; then
            build_njobs="${args[i+1]}"
         fi
     done
@@ -1241,6 +1244,7 @@ build_docs_builder() {
         --network=host \
         --target "docs-base" \
         -t ${DOCS_BASE_IMG} \
+        ${NO_CACHE} \
         ${extra_args} \
         ${TOP}/${DOCS_SRC_DIR}
 }
@@ -1265,6 +1269,7 @@ run_html_builder() {
         --target "docs-html" \
         --build-arg BASE_IMAGE=${current_build_img} \
         -t ${DOCS_HTML_IMG} \
+        ${NO_CACHE} \
         ${TOP}/${DOCS_SRC_DIR}
 
     # Run HTML generation with sphynx. We need nvidia runtime and the SDK mounted and pythonpath
@@ -1363,6 +1368,7 @@ build_pdf() {
         --network=host \
         --target "docs-pdf" \
         -t ${DOCS_PDF_IMG} \
+        ${NO_CACHE} \
         ${extra_args} \
         ${TOP}/${DOCS_SRC_DIR}
 
@@ -1652,6 +1658,7 @@ get_list_of_global_flags() {
     --help, -h                  : Print help messages for [command]
     --dryrun                    : Print commands to screen without running
     --standalone, -s            : Do not run prerequisite steps for build-related commands
+    --no-cache, -n              : Do not use cache when building docker images
     --arch, -a [amd64 | arm64]  : Specify the platform (for cross-compilation)
         Default: current host architecture ($(get_host_arch))
         Associated environment variable: HOLOSCAN_BUILD_ARCH
@@ -1715,6 +1722,10 @@ main() {
             DO_STANDALONE="true"
             shift
         ;;
+        -n|--no-cache)
+            NO_CACHE="--no-cache"
+            shift
+        ;;
         -a|--arch)
             ARCH=$(get_gnu_arch_str $2)
             shift
diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt
index b7c0300..a720b24 100644
--- a/scripts/CMakeLists.txt
+++ b/scripts/CMakeLists.txt
@@ -16,6 +16,7 @@
 # Install useful scripts for developers
 install(
   FILES
+    aja_build.sh
     convert_gxf_entities_to_images.py
     convert_gxf_entities_to_video.py
     convert_video_to_gxf_entities.py
diff --git a/scripts/README.md b/scripts/README.md
index 6d8b087..0b12630 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -2,6 +2,8 @@
 
 This folder includes the following scripts:
 
+- [`aja_build.sh`](#aja_build)
+- [`convert_gxf_entities_to_images.py`](#convert_gxf_entities_to_imagespy)
 - [`convert_gxf_entities_to_images.py`](#convert_gxf_entities_to_imagespy)
 - [`convert_gxf_entities_to_video.py`](#convert_gxf_entities_to_videopy)
 - [`convert_video_to_gxf_entities.py`](#convert_video_to_gxf_entitiespy)
@@ -15,6 +17,10 @@ This folder includes the following scripts:
 
 ____
 
+## aja_build.sh
+
+Builds the AJA SDK with the proper flags and optionally loads the driver.
+
 ## convert_gxf_entities_to_images.py
 
 Takes in the encoded GXF tensor files generated by the `video_stream_recorder` and export raw frames in .png files.
diff --git a/scripts/aja_build.sh b/scripts/aja_build.sh
new file mode 100755
index 0000000..439614b
--- /dev/null
+++ b/scripts/aja_build.sh
@@ -0,0 +1,153 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Read arguments.
+POSITIONAL_ARGS=()
+
+while [[ $# -gt 0 ]]; do
+  case $1 in
+    --skip-sdk)
+      SKIP_SDK=1
+      shift # past argument
+      ;;
+    --load-driver)
+      LOAD_DRIVER=1
+      shift # past argument
+      ;;
+    -*|--*)
+      echo "Unknown option $1"
+      echo "Usage: $(basename $0) [--skip-sdk] [--load-driver]"
+      exit 1
+      ;;
+    *)
+      POSITIONAL_ARGS+=("$1") # save positional arg
+      shift # past argument
+      ;;
+  esac
+done
+
+set -- "${POSITIONAL_ARGS[@]}" # restore positional parameters
+basedir=$(pwd)
+
+# Set the appropriate build flags.
+echo "=========================================================="
+echo -n "  Building AJA driver "
+if [ -z "$SKIP_SDK" ]; then
+    echo -n "and SDK "
+fi
+echo -n "with RDMA support for "
+export AJA_RDMA=1
+if lsmod | grep -q nvgpu ; then
+    echo "iGPU"
+    export AJA_IGPU=1
+else
+    echo "dGPU"
+    unset AJA_IGPU
+fi
+echo "==========================================================" && echo
+
+# Ensure the open source dGPU driver is being used.
+if [ -z "$AJA_IGPU" ]; then
+    LICENSE=$(modinfo -l nvidia)
+    if [ "$LICENSE" == "NVIDIA" ]; then
+        echo "ERROR: The open source NVIDIA drivers are required for RDMA support"
+        echo "       but the closed source drivers are currently installed. Please"
+        echo "       install the open source drivers then run this script again."
+        exit 1
+    fi
+fi
+
+# Ensure CMake is installed.
+if [ -z "$SKIP_SDK" ]; then
+    if ! command -v cmake &> /dev/null; then
+        echo "ERROR: CMake is not installed. Install it with the following then"
+        echo "       run this script again:"
+        echo "         sudo apt install -y cmake"
+        exit 1
+    fi
+fi
+
+# Checkout the libajantv2 repo.
+if [ ! -d libajantv2 ]; then
+    git clone https://github.com/nvidia-holoscan/libajantv2.git
+    if [ $? -ne 0 ]; then
+        echo "ERROR: Failed to checkout libajantv2 repo."
+        exit 1
+    fi
+    cd libajantv2/
+else
+    cd libajantv2/ && git pull
+    if [ $? -ne 0 ]; then
+        echo "ERROR: Failed to checkout libajantv2 repo."
+        exit 1
+    fi
+fi
+
+# Build the driver.
+make -j --directory driver/linux/
+if [ $? -ne 0 ]; then
+    echo "ERROR: Failed to build libajantv2 driver."
+    exit 1
+fi
+
+# Build the SDK.
+if [ -z "$SKIP_SDK" ]; then
+    mkdir -p build && cd build
+    cmake .. -Wno-dev && make -j
+    if [ $? -ne 0 ]; then
+        echo "ERROR: Failed to build libajantv2 SDK."
+        exit 1
+    fi
+    if ! [ -f tools/rdmawhacker/rdmawhacker ]; then
+        echo && echo "WARNING: rdmawhacker build was skipped. Is CUDA installed?"
+    fi
+fi
+
+# Load the driver.
+if [ -n "$LOAD_DRIVER" ]; then
+    echo && echo "=========================================================="
+    echo "Loading AJA driver..."
+    cd $basedir
+    sudo ./libajantv2/driver/bin/load_ajantv2
+    if [ $? -ne 0 ]; then
+        echo "ERROR: Failed to load AJA driver."
+        exit 1
+    fi
+    if [ -z "$SKIP_SDK" ]; then
+        echo && echo "Enumerating AJA Devices:"
+        ./libajantv2/build/demos/ntv2enumerateboards/ntv2enumerateboards
+        if [ $? -ne 0 ]; then
+            echo "ERROR: Failed to enumerate AJA devices."
+            exit 1
+        fi
+    fi
+fi
+
+# Finish up.
+echo && echo "============================================================"
+echo "SUCCESS!"
+if [ -z "$LOAD_DRIVER" ]; then
+    echo "Load driver using 'sudo ./libajantv2/driver/bin/load_ajantv2'"
+    if [ -f ${basedir}/libajantv2/build/demos/ntv2enumerateboards/ntv2enumerateboards ]; then
+        echo "Use ntv2enumerateboards tool to list available AJA devices:"
+        echo "  ./libajantv2/build/demos/ntv2enumerateboards/ntv2enumerateboards"
+    fi
+fi
+if [ -f ${basedir}/libajantv2/build/tools/rdmawhacker/rdmawhacker ]; then
+    echo "Use rdmawhacker tool to check RDMA is functional (CTRL-C to exit):"
+    echo "  ./libajantv2/build/tools/rdmawhacker/rdmawhacker"
+fi
+exit 0
diff --git a/src/core/cli_parser.cpp b/src/core/cli_parser.cpp
index 3e628a6..86bda65 100644
--- a/src/core/cli_parser.cpp
+++ b/src/core/cli_parser.cpp
@@ -67,6 +67,9 @@ void CLIParser::initialize(std::string app_description, std::string app_version)
         "Path to the configuration file. This will override the configuration file path "
         "configured in the application code (before run() is called).");
 
+    // Allow extra arguments (ie, don't throw an error)
+    app_.allow_extras(true);
+
     is_initialized_ = true;
   }
   has_error_ = false;
@@ -107,13 +110,17 @@ std::vector<std::string>& CLIParser::parse(std::vector<std::string>& argv) {
     std::cout << v.what() << "\n";
     std::exit(0);
   } catch (const CLI::ExtrasError& e) {
-    // Do nothing for the extra arguments error when too many positionals or options are found.
-    // This is intended to allow the application to handle the extra arguments.
+    // It won't reach here since `allow_extras` is set to `true`
+    // (`app_.allow_extras(true);`) in the `initialize()` function
   } catch (const CLI::ParseError& e) {
     // Print the error message and set the error flag
     HOLOSCAN_LOG_ERROR("{}", e.what());
     has_error_ = true;
   }
+  if (app_.get_allow_extras()) {
+    // Store the remaining arguments
+    argv = app_.remaining();
+  }
   return argv;
 }
 
diff --git a/src/core/executors/gxf/gxf_executor.cpp b/src/core/executors/gxf/gxf_executor.cpp
index 9327446..ab26caf 100644
--- a/src/core/executors/gxf/gxf_executor.cpp
+++ b/src/core/executors/gxf/gxf_executor.cpp
@@ -81,7 +81,7 @@ namespace {
 std::pair<uint64_t, uint64_t> get_capacity_and_policy(
     nvidia::gxf::Handle<nvidia::gxf::Component> component) {
   uint64_t capacity = 1;
-  uint64_t policy = 2;
+  uint64_t policy = holoscan::gxf::get_default_queue_policy();
   if (component.is_null()) {
     HOLOSCAN_LOG_ERROR("Null component handle");
     return std::make_pair(capacity, policy);
@@ -1124,7 +1124,7 @@ void GXFExecutor::connect_broadcast_to_previous_op(
 
         // Find prev_connector's capacity and policy.
         uint64_t prev_connector_capacity = 1;
-        uint64_t prev_connector_policy = 2;  // fault
+        uint64_t prev_connector_policy = holoscan::gxf::get_default_queue_policy();
 
         // Create a transmitter based on the prev_connector_type.
         switch (prev_connector_type) {
@@ -1275,7 +1275,7 @@ void GXFExecutor::create_broadcast_components(holoscan::OperatorGraph::NodeType
 
     uint64_t curr_min_size = 1;
     uint64_t curr_connector_capacity = 1;
-    uint64_t curr_connector_policy = 2;  // fault
+    uint64_t curr_connector_policy = holoscan::gxf::get_default_queue_policy();
 
     // Create a corresponding condition of the op's output port and set it as the
     // receiver's condition for the broadcast entity.
@@ -1657,6 +1657,14 @@ bool GXFExecutor::initialize_operator(Operator* op) {
   } else if (!own_gxf_context_) {  // GXF context was created outside
     HOLOSCAN_LOG_DEBUG("Not an own GXF context. Op: {}", op->name());
   }
+
+  // Skip if the operator is already initialized
+  if (op->is_initialized_) {
+    HOLOSCAN_LOG_DEBUG("Operator '{}' is already initialized. Skipping initialization.",
+                       op->name());
+    return true;
+  }
+
   HOLOSCAN_LOG_DEBUG("Initializing Operator '{}'", op->name());
 
   if (!op->spec()) {
@@ -1700,6 +1708,9 @@ bool GXFExecutor::initialize_operator(Operator* op) {
 
   // Set any parameters based on the specified arguments and parameter value defaults.
   op->set_parameters();
+
+  // Set the operator is initialized
+  op->is_initialized_ = true;
   return true;
 }
 
diff --git a/src/core/gxf/gxf_io_context.cpp b/src/core/gxf/gxf_io_context.cpp
index 1d5da7c..64721ee 100644
--- a/src/core/gxf/gxf_io_context.cpp
+++ b/src/core/gxf/gxf_io_context.cpp
@@ -41,6 +41,7 @@ nvidia::gxf::Receiver* get_gxf_receiver(const std::shared_ptr<IOSpec>& input_spe
           "'IOSpec::kAnySize'. Please call 'op_input.receive<std::vector<T>>()' instead of "
           "'op_input.receive<T>()'.",
           input_spec->name());
+      throw std::invalid_argument("Invalid template type for the input port");
     } else {
       HOLOSCAN_LOG_ERROR("Invalid connector type for the input spec '{}'", input_spec->name());
     }
@@ -87,24 +88,26 @@ std::any GXFInputContext::receive_impl(const char* name, bool no_error_message)
 
   auto it = inputs_.find(input_name);
   if (it == inputs_.end()) {
-    if (no_error_message) { return nullptr; }
+    if (no_error_message) { return kNoReceivedMessage; }
     // Show error message because the input name is not found.
     if (inputs_.size() == 1) {
-      HOLOSCAN_LOG_ERROR(
+      auto no_accessible_error_message = NoAccessibleMessageType(fmt::format(
           "The operator({}) has only one port with label '{}' but the non-existent port label "
           "'{}' was specified in the receive() method",
           op_->name(),
           inputs_.begin()->first,
-          name);
-      return kNoTypeCastableMessage;  // to cause a bad_any_cast
+          name));
+
+      return no_accessible_error_message;
     } else {
       if (inputs_.empty()) {
-        HOLOSCAN_LOG_ERROR(
-            "The operator({}) does not have any input port but '{}' was specified in "
-            "receive() method",
-            op_->name(),
-            input_name);
-        return kNoTypeCastableMessage;  // to cause a bad_any_cast
+        auto no_accessible_error_message = NoAccessibleMessageType(
+            fmt::format("The operator({}) does not have any input port but '{}' was specified in "
+                        "receive() method",
+                        op_->name(),
+                        input_name));
+
+        return no_accessible_error_message;
       }
 
       auto msg_buf = fmt::memory_buffer();
@@ -116,20 +119,25 @@ std::any GXFInputContext::receive_impl(const char* name, bool no_error_message)
           fmt::format_to(std::back_inserter(msg_buf), ", {}", label);
         }
       }
-      HOLOSCAN_LOG_ERROR(
-          "The operator({}) does not have an input port with label '{}'. It should be "
-          "one of ({:.{}}) in receive() method",
-          op_->name(),
-          input_name,
-          msg_buf.data(),
-          msg_buf.size());
-      return kNoTypeCastableMessage;  // to cause a bad_any_cast
+      auto no_accessible_error_message = NoAccessibleMessageType(
+          fmt::format("The operator({}) does not have an input port with label "
+                      "'{}'. It should be one of ({:.{}}) "
+                      "in receive() method",
+                      op_->name(),
+                      input_name,
+                      msg_buf.data(),
+                      msg_buf.size()));
+
+      return no_accessible_error_message;
     }
   }
 
   auto receiver = get_gxf_receiver(it->second);
   if (!receiver) {
-    return kNoTypeCastableMessage;  // to cause a bad_any_cast
+    auto no_accessible_error_message = NoAccessibleMessageType(
+        fmt::format("Invalid receiver found for the input port with name {}", input_name));
+
+    return no_accessible_error_message;
   }
 
   auto entity = receiver->receive();
diff --git a/src/core/gxf/gxf_utils.cpp b/src/core/gxf/gxf_utils.cpp
index 760ae48..29c4fc8 100644
--- a/src/core/gxf/gxf_utils.cpp
+++ b/src/core/gxf/gxf_utils.cpp
@@ -14,6 +14,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include <algorithm>
+#include <cstdlib>
 #include <string>
 #include <utility>
 
@@ -75,4 +77,27 @@ gxf_uid_t add_entity_group(void* context, std::string name) {
   return entity_group_gid;
 }
 
+uint64_t get_default_queue_policy() {
+  const char* env_value = std::getenv("HOLOSCAN_QUEUE_POLICY");
+  if (env_value) {
+    std::string value{env_value};
+    std::transform(
+        value.begin(), value.end(), value.begin(), [](unsigned char c) { return std::tolower(c); });
+    if (value == "pop") {
+      return 0UL;
+    } else if (value == "reject") {
+      return 1UL;
+    } else if (value == "fail") {
+      return 2UL;
+    } else {
+      HOLOSCAN_LOG_ERROR(
+          "Unrecognized HOLOSCAN_QUEUE_POLICY: {}. It should be 'pop', 'reject' or 'fail'. Falling "
+          "back to default policy of 'fail'",
+          value);
+      return 2UL;
+    }
+  }
+  return 2UL;  // fail
+}
+
 }  // namespace holoscan::gxf
diff --git a/src/core/gxf/gxf_wrapper.cpp b/src/core/gxf/gxf_wrapper.cpp
index 61886f5..ef7dc7c 100644
--- a/src/core/gxf/gxf_wrapper.cpp
+++ b/src/core/gxf/gxf_wrapper.cpp
@@ -74,11 +74,9 @@ gxf_result_t GXFWrapper::tick() {
   InputContext* op_input = exec_context.input();
   OutputContext* op_output = exec_context.output();
   try {
-    if (op_->is_metadata_enabled()) {
-      // clear any existing values from a previous compute call
-      auto dynamic_metadata = op_->metadata();
-      dynamic_metadata->clear();
-    }
+    // clear any existing values from a previous compute call
+    op_->metadata()->clear();
+
     op_->compute(*op_input, *op_output, exec_context);
     // Note: output metadata is inserted via op_output.emit() rather than here
   } catch (const std::exception& e) {
diff --git a/src/core/resources/gxf/double_buffer_receiver.cpp b/src/core/resources/gxf/double_buffer_receiver.cpp
index 8832276..083b4ea 100644
--- a/src/core/resources/gxf/double_buffer_receiver.cpp
+++ b/src/core/resources/gxf/double_buffer_receiver.cpp
@@ -62,7 +62,8 @@ const char* DoubleBufferReceiver::gxf_typename() const {
 
 void DoubleBufferReceiver::setup(ComponentSpec& spec) {
   spec.param(capacity_, "capacity", "Capacity", "", 1UL);
-  spec.param(policy_, "policy", "Policy", "0: pop, 1: reject, 2: fault", 2UL);
+  auto default_policy = holoscan::gxf::get_default_queue_policy();
+  spec.param(policy_, "policy", "Policy", "0: pop, 1: reject, 2: fault", default_policy);
 }
 
 void DoubleBufferReceiver::track() {
diff --git a/src/core/resources/gxf/double_buffer_transmitter.cpp b/src/core/resources/gxf/double_buffer_transmitter.cpp
index 42a3007..d17870f 100644
--- a/src/core/resources/gxf/double_buffer_transmitter.cpp
+++ b/src/core/resources/gxf/double_buffer_transmitter.cpp
@@ -62,7 +62,8 @@ const char* DoubleBufferTransmitter::gxf_typename() const {
 
 void DoubleBufferTransmitter::setup(ComponentSpec& spec) {
   spec.param(capacity_, "capacity", "Capacity", "", 1UL);
-  spec.param(policy_, "policy", "Policy", "0: pop, 1: reject, 2: fault", 2UL);
+  auto default_policy = holoscan::gxf::get_default_queue_policy();
+  spec.param(policy_, "policy", "Policy", "0: pop, 1: reject, 2: fault", default_policy);
 }
 
 void DoubleBufferTransmitter::track() {
diff --git a/src/core/resources/gxf/std_component_serializer.cpp b/src/core/resources/gxf/std_component_serializer.cpp
index c399f1a..7ea0486 100644
--- a/src/core/resources/gxf/std_component_serializer.cpp
+++ b/src/core/resources/gxf/std_component_serializer.cpp
@@ -31,12 +31,19 @@ void StdComponentSerializer::setup(ComponentSpec& spec) {
 // }
 
 void StdComponentSerializer::initialize() {
-  // Set up prerequisite parameters before calling GXFOperator::initialize()
-  auto frag = fragment();
-  auto allocator = frag->make_resource<UnboundedAllocator>("std_component_serializer_allocator");
-  allocator->gxf_cname(allocator->name().c_str());
-  if (gxf_eid_ != 0) { allocator->gxf_eid(gxf_eid_); }
-  add_arg(Arg("allocator") = allocator);
+  // Add a default UnboundedAllocator if no allocator was provided
+  auto has_allocator = std::find_if(
+      args().begin(), args().end(), [](const auto& arg) { return (arg.name() == "allocator"); });
+  if (has_allocator == args().end()) {
+    HOLOSCAN_LOG_TRACE("StdComponentSerializer: allocator argument not found, using default.");
+    auto frag = fragment();
+    auto allocator = frag->make_resource<UnboundedAllocator>("std_component_serializer_allocator");
+    allocator->gxf_cname(allocator->name().c_str());
+    if (gxf_eid_ != 0) { allocator->gxf_eid(gxf_eid_); }
+    add_arg(Arg("allocator") = allocator);
+  } else {
+    HOLOSCAN_LOG_TRACE("StdComponentSerializer: allocator argument found");
+  }
 
   GXFResource::initialize();
 }
diff --git a/src/core/resources/gxf/std_entity_serializer.cpp b/src/core/resources/gxf/std_entity_serializer.cpp
index dd71257..2a5bb20 100644
--- a/src/core/resources/gxf/std_entity_serializer.cpp
+++ b/src/core/resources/gxf/std_entity_serializer.cpp
@@ -45,12 +45,22 @@ nvidia::gxf::StdEntitySerializer* StdEntitySerializer::get() const {
 void StdEntitySerializer::initialize() {
   // Set up prerequisite parameters before calling GXFOperator::initialize()
   auto frag = fragment();
-  auto component_serializer =
-      frag->make_resource<holoscan::StdComponentSerializer>("std_component_serializer");
-  component_serializer->gxf_cname(component_serializer->name().c_str());
-  if (gxf_eid_ != 0) { component_serializer->gxf_eid(gxf_eid_); }
-  add_arg(Arg("component_serializers") =
-              std::vector<std::shared_ptr<Resource>>{component_serializer});
+
+  auto has_component_serializers = std::find_if(args().begin(), args().end(), [](const auto& arg) {
+    return (arg.name() == "component_serializers");
+  });
+  if (has_component_serializers == args().end()) {
+    HOLOSCAN_LOG_TRACE(
+        "StdEntitySerializer: component_serializers argument not found, using default.");
+    auto component_serializer =
+        frag->make_resource<holoscan::StdComponentSerializer>("std_component_serializer");
+    component_serializer->gxf_cname(component_serializer->name().c_str());
+    if (gxf_eid_ != 0) { component_serializer->gxf_eid(gxf_eid_); }
+    add_arg(Arg("component_serializers") =
+                std::vector<std::shared_ptr<Resource>>{component_serializer});
+  } else {
+    HOLOSCAN_LOG_TRACE("StdEntitySerializer: component_serializers argument found");
+  }
 
   GXFResource::initialize();
 }
diff --git a/src/core/resources/gxf/ucx_receiver.cpp b/src/core/resources/gxf/ucx_receiver.cpp
index aab6b14..6476c0b 100644
--- a/src/core/resources/gxf/ucx_receiver.cpp
+++ b/src/core/resources/gxf/ucx_receiver.cpp
@@ -24,6 +24,7 @@
 #include "holoscan/core/component_spec.hpp"
 #include "holoscan/core/fragment.hpp"
 #include "holoscan/core/gxf/gxf_resource.hpp"
+#include "holoscan/core/gxf/gxf_utils.hpp"
 #include "holoscan/core/resources/gxf/ucx_serialization_buffer.hpp"
 
 namespace holoscan {
@@ -58,7 +59,8 @@ UcxReceiver::UcxReceiver(const std::string& name, nvidia::gxf::Receiver* compone
 void UcxReceiver::setup(ComponentSpec& spec) {
   HOLOSCAN_LOG_DEBUG("UcxReceiver::setup");
   spec.param(capacity_, "capacity", "Capacity", "", 1UL);
-  spec.param(policy_, "policy", "Policy", "0: pop, 1: reject, 2: fault", 2UL);
+  auto default_policy = holoscan::gxf::get_default_queue_policy();
+  spec.param(policy_, "policy", "Policy", "0: pop, 1: reject, 2: fault", default_policy);
   spec.param(address_, "address", "RX address", "RX address", std::string("0.0.0.0"));
   spec.param(port_, "port", "rx_port", "RX port", kDefaultUcxPort);
   spec.param(buffer_, "buffer", "Serialization Buffer", "");
diff --git a/src/core/resources/gxf/ucx_transmitter.cpp b/src/core/resources/gxf/ucx_transmitter.cpp
index 6bda099..af73395 100644
--- a/src/core/resources/gxf/ucx_transmitter.cpp
+++ b/src/core/resources/gxf/ucx_transmitter.cpp
@@ -24,6 +24,7 @@
 #include "holoscan/core/component_spec.hpp"
 #include "holoscan/core/fragment.hpp"
 #include "holoscan/core/gxf/gxf_resource.hpp"
+#include "holoscan/core/gxf/gxf_utils.hpp"
 #include "holoscan/core/resources/gxf/ucx_receiver.hpp"  // for kDefaultUcxPort
 #include "holoscan/core/resources/gxf/ucx_serialization_buffer.hpp"
 
@@ -71,7 +72,8 @@ UcxTransmitter::UcxTransmitter(const std::string& name, nvidia::gxf::Transmitter
 void UcxTransmitter::setup(ComponentSpec& spec) {
   HOLOSCAN_LOG_DEBUG("UcxTransmitter::setup");
   spec.param(capacity_, "capacity", "Capacity", "", 1UL);
-  spec.param(policy_, "policy", "Policy", "0: pop, 1: reject, 2: fault", 2UL);
+  auto default_policy = holoscan::gxf::get_default_queue_policy();
+  spec.param(policy_, "policy", "Policy", "0: pop, 1: reject, 2: fault", default_policy);
   spec.param(receiver_address_,
              "receiver_address",
              "Receiver address",
diff --git a/src/core/system/gpu_resource_monitor.cpp b/src/core/system/gpu_resource_monitor.cpp
index e227e09..fe7fdb1 100644
--- a/src/core/system/gpu_resource_monitor.cpp
+++ b/src/core/system/gpu_resource_monitor.cpp
@@ -88,7 +88,7 @@
     }                                                                 \
   }
 
-#define HOLOSCAN_CUDA_CALL(stmt)                                                                \
+#define HOLOSCAN_CUDA_CALL_CHECK_HANDLE(stmt)                                                   \
   ({                                                                                            \
     holoscan::cuda::cudaError_t _holoscan_cuda_err = -1;                                        \
     if (cuda_handle_ == nullptr) {                                                              \
@@ -113,23 +113,23 @@
     _holoscan_cuda_err;                                                                         \
   })
 
-#define HOLOSCAN_CUDA_CALL_RETURN(stmt)                                        \
-  {                                                                            \
-    holoscan::cuda::cudaError_t _holoscan_cuda_err = HOLOSCAN_CUDA_CALL(stmt); \
-    if (_holoscan_cuda_err != 0) {                                             \
-      shutdown_cuda_runtime();                                                 \
-      return;                                                                  \
-    }                                                                          \
+#define HOLOSCAN_CUDA_CALL_RETURN(stmt)                                                     \
+  {                                                                                         \
+    holoscan::cuda::cudaError_t _holoscan_cuda_err = HOLOSCAN_CUDA_CALL_CHECK_HANDLE(stmt); \
+    if (_holoscan_cuda_err != 0) {                                                          \
+      shutdown_cuda_runtime();                                                              \
+      return;                                                                               \
+    }                                                                                       \
   }
 
-#define HOLOSCAN_CUDA_CALL_RETURN_VALUE_MSG(stmt, return_value, ...)           \
-  {                                                                            \
-    holoscan::cuda::cudaError_t _holoscan_cuda_err = HOLOSCAN_CUDA_CALL(stmt); \
-    if (_holoscan_cuda_err != 0) {                                             \
-      HOLOSCAN_LOG_ERROR(__VA_ARGS__);                                         \
-      shutdown_cuda_runtime();                                                 \
-      return return_value;                                                     \
-    }                                                                          \
+#define HOLOSCAN_CUDA_CALL_RETURN_VALUE_MSG(stmt, return_value, ...)                        \
+  {                                                                                         \
+    holoscan::cuda::cudaError_t _holoscan_cuda_err = HOLOSCAN_CUDA_CALL_CHECK_HANDLE(stmt); \
+    if (_holoscan_cuda_err != 0) {                                                          \
+      HOLOSCAN_LOG_ERROR(__VA_ARGS__);                                                      \
+      shutdown_cuda_runtime();                                                              \
+      return return_value;                                                                  \
+    }                                                                                       \
   }
 
 namespace holoscan {
diff --git a/src/core/system/network_utils.cpp b/src/core/system/network_utils.cpp
index 7d70c50..ba3b670 100644
--- a/src/core/system/network_utils.cpp
+++ b/src/core/system/network_utils.cpp
@@ -18,8 +18,6 @@
 #include <ifaddrs.h>     // for getifaddrs()
 #include <netdb.h>       // for getaddrinfo(), gai_strerror(), freeaddrinfo(), addrinfo
 #include <netinet/in.h>  // for sockaddr_in
-#include <spawn.h>       // for posix_spawnp()
-#include <sys/wait.h>    // for waitpid()
 #include <unistd.h>
 #include <cstdlib>  // for rand()
 #include <cstring>  // for memset()
@@ -32,88 +30,23 @@
 
 #include "holoscan/logger/logger.hpp"
 
-/// Global variable that holds the environment variables
-extern char** environ;
-
 namespace holoscan {
 
-////////////////////////////////////////////////////////////////////////////////////////////////////
-// The following snippet from CLI11 that is under the BSD-3-Clause license:
-//     https://github.com/CLIUtils/CLI11/blob/89601ee/include/CLI/impl/Argv_inl.hpp
-// We have modified it to use Linux specific functions to get the command line arguments from
-// /proc/self/cmdline. Please see https://github.com/CLIUtils/CLI11/pull/804.
-// This is to avoid the use of `main()` and `argv` when spawning a child process.
-////////////////////////////////////////////////////////////////////////////////////////////////////
-namespace cli11_detail {
-static const std::vector<const char*>& args() {
-  // This function uses initialization via lambdas extensively to take advantage of the thread
-  // safety of static variable initialization [stmt.dcl.3]
-  static const std::vector<const char*> static_args = [] {
-    static const std::vector<char> static_cmdline = [] {
-      // On posix, retrieve arguments from /proc/self/cmdline, separated by null terminators.
-      std::vector<char> cmdline;
-
-      auto deleter = [](FILE* f) { std::fclose(f); };
-      std::unique_ptr<FILE, decltype(deleter)> fp_unique(std::fopen("/proc/self/cmdline", "r"),
-                                                         deleter);
-      FILE* fp = fp_unique.get();
-      if (!fp) {
-        throw std::runtime_error(
-            "could not open /proc/self/cmdline for reading");  // LCOV_EXCL_LINE
-      }
-
-      size_t size = 0;
-      while (std::feof(fp) == 0) {
-        cmdline.resize(size + 128);
-        size += std::fread(cmdline.data() + size, 1, 128, fp);
-
-        if (std::ferror(fp) != 0) {
-          throw std::runtime_error("error during reading /proc/self/cmdline");  // LCOV_EXCL_LINE
-        }
-      }
-      cmdline.resize(size);
-
-      return cmdline;
-    }();
-
-    std::size_t argc =
-        static_cast<std::size_t>(std::count(static_cmdline.begin(), static_cmdline.end(), '\0'));
-    std::vector<const char*> static_args_result;
-    static_args_result.reserve(argc);
-
-    for (auto it = static_cmdline.begin(); it != static_cmdline.end();
-         it = std::find(it, static_cmdline.end(), '\0') + 1) {
-      static_args_result.push_back(static_cmdline.data() + (it - static_cmdline.begin()));
-    }
-
-    return static_args_result;
-  }();
-
-  return static_args;
-}
-}  // namespace cli11_detail
-
-static const char* const* argv() {
-  return cli11_detail::args().data();
-}
-
-static int argc() {
-  return static_cast<int>(cli11_detail::args().size());
-}
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
 class Socket {
  public:
   explicit Socket(int domain, int type, int protocol) : sockfd_(socket(domain, type, protocol)) {
     if (sockfd_ < 0) { throw std::runtime_error("Error creating socket"); }
   }
 
-  ~Socket() { close(sockfd_); }
+  ~Socket() {
+    if (sockfd_ >= 0) { close(sockfd_); }
+    sockfd_ = -1;
+  }
 
   int descriptor() const { return sockfd_; }
 
  private:
-  int sockfd_;
+  int sockfd_ = -1;
 };
 
 class AddrInfo {
@@ -134,48 +67,6 @@ class AddrInfo {
   addrinfo* res_;
 };
 
-static ssize_t write_all(int fd, const void* buffer, size_t count) {
-  const char* buf_ptr = reinterpret_cast<const char*>(buffer);
-  size_t bytes_left = count;
-  ssize_t total_bytes_written = 0;
-
-  while (bytes_left > 0) {
-    ssize_t bytes_written = write(fd, buf_ptr, bytes_left);
-    if (bytes_written < 0) {
-      if (errno == EINTR) continue;  // Retry if interrupted by signal
-      throw std::runtime_error(
-          fmt::format("Error writing to fd {}: {} ({})", fd, std::strerror(errno), errno));
-    }
-    if (bytes_written == 0) { throw std::runtime_error("Unexpected zero write"); }
-    total_bytes_written += bytes_written;
-    bytes_left -= bytes_written;
-    buf_ptr += bytes_written;
-  }
-
-  return total_bytes_written;
-}
-
-static ssize_t read_all(int fd, void* buffer, size_t count) {
-  char* buf_ptr = reinterpret_cast<char*>(buffer);
-  size_t bytes_left = count;
-  ssize_t total_bytes_read = 0;
-
-  while (bytes_left > 0) {
-    ssize_t bytes_read = read(fd, buf_ptr, bytes_left);
-    if (bytes_read < 0) {
-      if (errno == EINTR) continue;  // Retry if interrupted by signal
-      throw std::runtime_error(
-          fmt::format("Error reading from fd {}: {} ({})", fd, std::strerror(errno), errno));
-    }
-    if (bytes_read == 0) { throw std::runtime_error("Unexpected EOF while reading"); }
-    total_bytes_read += bytes_read;
-    bytes_left -= bytes_read;
-    buf_ptr += bytes_read;
-  }
-
-  return total_bytes_read;
-}
-
 static bool is_port_available(int port) {
   struct addrinfo hints {};
   // Set up the hints structure
@@ -210,10 +101,9 @@ static bool is_port_available(int port) {
   return false;
 }
 
-static std::vector<int> get_unused_network_ports_impl(uint32_t num_ports, uint32_t min_port,
-                                                      uint32_t max_port,
-                                                      const std::vector<int>& used_ports,
-                                                      const std::vector<int>& prefer_ports) {
+std::vector<int> get_unused_network_ports(uint32_t num_ports, uint32_t min_port, uint32_t max_port,
+                                          const std::vector<int>& used_ports,
+                                          const std::vector<int>& prefer_ports) {
   // Add exclude ports to the set
   std::unordered_set<int> used_port_set(used_ports.begin(), used_ports.end());
   used_port_set.reserve(num_ports + used_ports.size());
@@ -242,324 +132,10 @@ static std::vector<int> get_unused_network_ports_impl(uint32_t num_ports, uint32
     try_insert_port(port);
   }
 
-  return unused_ports;
-}
-
-class Pipe {
- public:
-  Pipe() {
-    if (pipe(fd_) == -1) { throw std::runtime_error("Error creating pipe"); }
-  }
-  explicit Pipe(int fd0, int fd1) : fd_{fd0, fd1} {}
-
-  ~Pipe() {
-    if (fd_[0] != -1) { close(fd_[0]); }
-    if (fd_[1] != -1) { close(fd_[1]); }
-  }
-
-  int read_fd() const { return fd_[0]; }
-  int write_fd() const { return fd_[1]; }
-
- private:
-  int fd_[2];
-};
-
-class PosixSpawnAttr {
- public:
-  PosixSpawnAttr() {
-    if (posix_spawnattr_init(&attr_) != 0) {
-      throw std::runtime_error("Error initializing posix_spawnattr_t");
-    }
-  }
-
-  ~PosixSpawnAttr() { posix_spawnattr_destroy(&attr_); }
-
-  posix_spawnattr_t* get() { return &attr_; }
+  HOLOSCAN_LOG_DEBUG(
+      "unused_ports={} (size:{})", fmt::join(unused_ports, ","), unused_ports.size());
 
- private:
-  posix_spawnattr_t attr_;
-};
-
-class PosixSpawnFileActions {
- public:
-  PosixSpawnFileActions() {
-    if (posix_spawn_file_actions_init(&actions_) != 0) {
-      throw std::runtime_error("Error initializing posix_spawn_file_actions_t");
-    }
-  }
-
-  ~PosixSpawnFileActions() { posix_spawn_file_actions_destroy(&actions_); }
-
-  posix_spawn_file_actions_t* get() { return &actions_; }
-
- private:
-  posix_spawn_file_actions_t actions_;
-};
-
-namespace {
-
-/**
- * @brief Initializes the network port search process in a child process.
- *
- * The Initializer class is responsible for setting up and initializing
- * the process of searching for unused network ports. It will check if
- * it's running in a child process context, setup logging, read parameters
- * from a pipe, execute the search for unused network ports, and send the
- * results back to the parent process through a pipe.
- *
- * A static object of this class is created to ensure its constructor is
- * called before main() starts, and its destructor is called after main() ends.
- */
-class Initializer {
- public:
-  /**
-   * @brief Constructor of the Initializer class.
-   *
-   * The constructor checks if it's running in a child process context by
-   * checking environment variables. If it is in a child process context,
-   * it sets up logging, reads parameters from the parent process through a
-   * pipe, finds unused network ports, and sends the results back to the
-   * parent process through a pipe.
-   */
-  Initializer() {
-    // Check if this is the child process
-    const char* is_child_process = std::getenv("HOLOSCAN_UNUSED_IP_CHILD_PROCESS");
-    if (is_child_process == nullptr || std::strncmp(is_child_process, "1", 1) != 0) { return; }
-
-    HOLOSCAN_LOG_DEBUG("Child process started to find unused network ports");
-
-    // Enable logging
-    holoscan::set_log_level(LogLevel::INFO);
-    holoscan::set_log_pattern();
-
-    try {
-      // Get the pipe file descriptors from the environment variables
-      char* parent_to_child_read_fd_str = std::getenv("HOLOSCAN_UNUSED_IP_PARENT_TO_CHILD_READ_FD");
-      char* child_to_parent_write_fd_str =
-          std::getenv("HOLOSCAN_UNUSED_IP_CHILD_TO_PARENT_WRITE_FD");
-      if (parent_to_child_read_fd_str == nullptr || child_to_parent_write_fd_str == nullptr) {
-        HOLOSCAN_LOG_ERROR("Error getting pipe file descriptors from environment variables");
-        exit(1);
-      }
-      Pipe parent_to_child(std::stoi(parent_to_child_read_fd_str), -1);
-      Pipe child_to_parent(-1, std::stoi(child_to_parent_write_fd_str));
-
-      // Deserialize parameters from parent process
-
-      uint32_t num_ports, min_port, max_port;
-      size_t used_ports_size;
-      std::vector<int> used_ports;
-      size_t prefer_ports_size;
-      std::vector<int> prefer_ports;
-
-      // Read scalar values
-      ssize_t bytes_read = read(parent_to_child.read_fd(), &num_ports, sizeof(num_ports));
-      bytes_read += read(parent_to_child.read_fd(), &min_port, sizeof(min_port));
-      bytes_read += read(parent_to_child.read_fd(), &max_port, sizeof(max_port));
-
-      // Read vector values
-      bytes_read += read(parent_to_child.read_fd(),
-                         &used_ports_size,
-                         sizeof(used_ports_size));  // Reading the size of the vector
-      used_ports.resize(used_ports_size);
-      bytes_read +=
-          read_all(parent_to_child.read_fd(), used_ports.data(), used_ports_size * sizeof(int));
-
-      bytes_read += read(parent_to_child.read_fd(),
-                         &prefer_ports_size,
-                         sizeof(prefer_ports_size));  // Reading the size of the vector
-      prefer_ports.resize(prefer_ports_size);
-      bytes_read +=
-          read_all(parent_to_child.read_fd(), prefer_ports.data(), prefer_ports_size * sizeof(int));
-
-      if (bytes_read !=
-          static_cast<ssize_t>(sizeof(num_ports) + sizeof(min_port) + sizeof(max_port) +
-                               sizeof(used_ports_size) + used_ports_size * sizeof(int) +
-                               sizeof(prefer_ports_size) + prefer_ports_size * sizeof(int))) {
-        throw std::runtime_error(
-            fmt::format("Unable to read port parameters from pipe (fd={}): {} ({})",
-                        parent_to_child.read_fd(),
-                        std::strerror(errno),
-                        errno));
-      }
-
-      HOLOSCAN_LOG_DEBUG("num_ports={}, min_port={}, max_port={}, used_ports={}, prefer_ports={}",
-                         num_ports,
-                         min_port,
-                         max_port,
-                         fmt::join(used_ports, ","),
-                         fmt::join(prefer_ports, ","));
-
-      std::vector<int> unused_ports =
-          get_unused_network_ports_impl(num_ports, min_port, max_port, used_ports, prefer_ports);
-
-      // Serialize results to the parent process using the child_to_parent pipe
-
-      int unused_ports_size = unused_ports.size();
-      ssize_t bytes_written = write(child_to_parent.write_fd(),
-                                    &unused_ports_size,
-                                    sizeof(unused_ports_size));  // Writing the size of the vector
-      bytes_written += write_all(
-          child_to_parent.write_fd(), unused_ports.data(), unused_ports_size * sizeof(int));
-
-      if (bytes_written !=
-          static_cast<ssize_t>(sizeof(unused_ports_size) + unused_ports_size * sizeof(int))) {
-        throw std::runtime_error(
-            fmt::format("Unable to write unused port info to pipe (fd={}): {} ({})",
-                        child_to_parent.write_fd(),
-                        std::strerror(errno),
-                        errno));
-      }
-
-      // Exit with success
-      exit(0);
-    } catch (const std::exception& e) {
-      HOLOSCAN_LOG_ERROR("Error in child process: {}", e.what());
-    }
-    // Exit with error
-    exit(1);
-  }
-};
-
-/// Create a static object of the class
-/// Its constructor will be called before main() starts,
-/// and its destructor will be called after main() ends.
-static Initializer initializer;
-
-}  // namespace
-
-std::vector<int> get_unused_network_ports(uint32_t num_ports, uint32_t min_port, uint32_t max_port,
-                                          const std::vector<int>& used_ports,
-                                          const std::vector<int>& prefer_ports) {
-  // Note:: Since opening and closing sockets makes the open port unavailable for a while, we use a
-  // child process to parallelize the process of finding unused ports. The child process writes
-  // the unused ports to a pipe that the parent process reads from.
-
-  // Create two pipes: one for parent-to-child communication, and one for child-to-parent
-  // communication
-  try {
-    Pipe parent_to_child_pipe;
-    Pipe child_to_parent_pipe;
-
-    PosixSpawnAttr attr;
-    PosixSpawnFileActions action;
-
-    // Set the file descriptors to be closed in the child process
-    posix_spawn_file_actions_addclose(action.get(), parent_to_child_pipe.write_fd());
-    posix_spawn_file_actions_addclose(action.get(), child_to_parent_pipe.read_fd());
-
-    // Copy argv() to args.
-    char* args[argc() + 1]{};
-    for (int i = 0; i < argc(); ++i) { args[i] = const_cast<char*>(argv()[i]); }
-    args[argc()] = nullptr;  // the last element must be a nullptr
-
-    // Get current environment variables
-    char** env_vars = environ;
-
-    // Determine the number of current environment variables
-    int env_count = 0;
-    while (env_vars[env_count] != nullptr) { env_count++; }
-
-    // Create a vector to hold the current environment variables plus the new one
-    std::vector<std::string> local_env_vars;
-    local_env_vars.reserve(env_count + 5);
-    for (int i = 0; i < env_count; i++) { local_env_vars.push_back(std::string(env_vars[i])); }
-
-    // Add the new environment variable to the local vector
-    local_env_vars.push_back("HOLOSCAN_UNUSED_IP_CHILD_PROCESS=1");
-
-    // Update the environment variable setup to include the file descriptors for both pipes
-    local_env_vars.push_back(fmt::format("HOLOSCAN_UNUSED_IP_PARENT_TO_CHILD_READ_FD={}",
-                                         parent_to_child_pipe.read_fd()));
-    local_env_vars.push_back(fmt::format("HOLOSCAN_UNUSED_IP_CHILD_TO_PARENT_WRITE_FD={}",
-                                         child_to_parent_pipe.write_fd()));
-
-    // Convert the local vector to a char** array
-    std::vector<char*> local_env_ptrs;
-    local_env_ptrs.reserve(local_env_vars.size() + 1);
-    for (auto& str : local_env_vars) { local_env_ptrs.push_back(&str[0]); }
-    local_env_ptrs.push_back(nullptr);  // Null-terminate the array
-
-    // Now local_env_ptrs is a char** array with the original environment variables plus the new
-    // ones
-    char** new_env_vars = &local_env_ptrs[0];
-
-    pid_t pid;
-    int status = posix_spawnp(&pid, args[0], action.get(), attr.get(), args, new_env_vars);
-    if (status != 0) {
-      throw std::runtime_error(
-          fmt::format("Unable to spawn child process: {} ({})", std::strerror(status), status));
-    } else {
-      // Parent process
-      HOLOSCAN_LOG_DEBUG("Child process spawned with pid {}", pid);
-
-      // Serialize parameters to send to child process
-
-      size_t used_ports_size = used_ports.size();
-      size_t prefer_ports_size = prefer_ports.size();
-
-      // Write scalar values
-      ssize_t bytes_written = write(parent_to_child_pipe.write_fd(), &num_ports, sizeof(num_ports));
-      bytes_written += write(parent_to_child_pipe.write_fd(), &min_port, sizeof(min_port));
-      bytes_written += write(parent_to_child_pipe.write_fd(), &max_port, sizeof(max_port));
-
-      // Write vector values
-      bytes_written += write(parent_to_child_pipe.write_fd(),
-                             &used_ports_size,
-                             sizeof(used_ports_size));  // Writing the size of the vector
-      bytes_written += write_all(
-          parent_to_child_pipe.write_fd(), used_ports.data(), used_ports_size * sizeof(int));
-
-      bytes_written += write(parent_to_child_pipe.write_fd(),
-                             &prefer_ports_size,
-                             sizeof(prefer_ports_size));  // Writing the size of the vector
-      bytes_written += write_all(
-          parent_to_child_pipe.write_fd(), prefer_ports.data(), prefer_ports_size * sizeof(int));
-
-      if (bytes_written !=
-          static_cast<ssize_t>(sizeof(num_ports) + sizeof(min_port) + sizeof(max_port) +
-                               sizeof(used_ports_size) + used_ports_size * sizeof(int) +
-                               sizeof(prefer_ports_size) + prefer_ports_size * sizeof(int))) {
-        throw std::runtime_error(
-            fmt::format("Unable to write port parameters to pipe (fd={}): {} ({})",
-                        parent_to_child_pipe.write_fd(),
-                        std::strerror(errno),
-                        errno));
-      }
-
-      // Read the unused ports from the child process
-
-      int unused_ports_size = 0;
-
-      ssize_t bytes_read =
-          read(child_to_parent_pipe.read_fd(), &unused_ports_size, sizeof(unused_ports_size));
-
-      std::vector<int> unused_ports(unused_ports_size);
-      bytes_read += read_all(
-          child_to_parent_pipe.read_fd(), unused_ports.data(), unused_ports_size * sizeof(int));
-      if (bytes_read !=
-          static_cast<ssize_t>(sizeof(unused_ports_size) + unused_ports_size * sizeof(int))) {
-        throw std::runtime_error(
-            fmt::format("Unable to read unused ports from pipe (fd={}): {} ({})",
-                        child_to_parent_pipe.read_fd(),
-                        std::strerror(errno),
-                        errno));
-      }
-
-      HOLOSCAN_LOG_DEBUG(
-          "unused_ports={} (size:{})", fmt::join(unused_ports, ","), unused_ports.size());
-
-      int exit_code;
-      waitpid(pid, &exit_code, 0);
-      HOLOSCAN_LOG_DEBUG(
-          "Child process exited with code {} ('{}')", exit_code, strerror(exit_code));
-
-      return unused_ports;
-    }
-  } catch (const std::exception& e) {
-    HOLOSCAN_LOG_ERROR("Error spawning child process: {}", e.what());
-  }
-  return {};
+  return unused_ports;
 }
 
 std::vector<int> get_preferred_network_ports(const char* env_var_name) {
diff --git a/src/operators/CMakeLists.txt b/src/operators/CMakeLists.txt
index 228b9da..173c0e7 100644
--- a/src/operators/CMakeLists.txt
+++ b/src/operators/CMakeLists.txt
@@ -25,6 +25,8 @@ add_subdirectory(inference)
 add_subdirectory(inference_processor)
 add_subdirectory(ping_rx)
 add_subdirectory(ping_tx)
+add_subdirectory(ping_tensor_rx)
+add_subdirectory(ping_tensor_tx)
 add_subdirectory(segmentation_postprocessor)
 add_subdirectory(v4l2_video_capture)
 add_subdirectory(video_stream_recorder)
diff --git a/src/operators/aja_source/aja_source.cpp b/src/operators/aja_source/aja_source.cpp
index b1496d5..deaa810 100644
--- a/src/operators/aja_source/aja_source.cpp
+++ b/src/operators/aja_source/aja_source.cpp
@@ -49,6 +49,7 @@ void AJASourceOp::setup(OperatorSpec& spec) {
   constexpr uint32_t kDefaultWidth = 1920;
   constexpr uint32_t kDefaultHeight = 1080;
   constexpr uint32_t kDefaultFramerate = 60;
+  constexpr bool kDefaultInterlaced = false;
   constexpr bool kDefaultRDMA = false;
   constexpr bool kDefaultEnableOverlay = false;
   constexpr bool kDefaultOverlayRDMA = false;
@@ -65,6 +66,7 @@ void AJASourceOp::setup(OperatorSpec& spec) {
   spec.param(width_, "width", "Width", "Width of the stream.", kDefaultWidth);
   spec.param(height_, "height", "Height", "Height of the stream.", kDefaultHeight);
   spec.param(framerate_, "framerate", "Framerate", "Framerate of the stream.", kDefaultFramerate);
+  spec.param(interlaced_, "interlaced", "Interlaced", "Interlaced or not.", kDefaultInterlaced);
   spec.param(use_rdma_, "rdma", "RDMA", "Enable RDMA.", kDefaultRDMA);
   spec.param(
       enable_overlay_, "enable_overlay", "EnableOverlay", "Enable overlay.", kDefaultEnableOverlay);
@@ -88,15 +90,85 @@ void AJASourceOp::setup(OperatorSpec& spec) {
 }
 
 AJAStatus AJASourceOp::DetermineVideoFormat() {
-  if (width_ == 1920 && height_ == 1080 && framerate_ == 60) {
-    video_format_ = NTV2_FORMAT_1080p_6000_A;
-  } else if (width_ == 3840 && height_ == 2160 && framerate_ == 60) {
-    video_format_ = NTV2_FORMAT_3840x2160p_6000;
+  video_format_ = NTV2_FORMAT_UNKNOWN;
+
+  if (interlaced_) {
+    if (width_ == 1920 && height_ == 1080) {
+      if (framerate_ == 50) {
+        video_format_ = NTV2_FORMAT_1080i_5000;
+      } else if (framerate_ == 59) {
+        video_format_ = NTV2_FORMAT_1080i_5994;
+      } else if (framerate_ == 60) {
+        video_format_ = NTV2_FORMAT_1080i_6000;
+      }
+    }
   } else {
-    return AJA_STATUS_UNSUPPORTED;
+    if (width_ == 1280 && height_ == 720) {
+      if (framerate_ == 50) {
+        video_format_ = NTV2_FORMAT_720p_5000;
+      } else if (framerate_ == 59) {
+        video_format_ = NTV2_FORMAT_720p_5994;
+      } else if (framerate_ == 60) {
+        video_format_ = NTV2_FORMAT_720p_6000;
+      }
+    } else if (width_ == 1920 && height_ == 1080) {
+      if (framerate_ == 23) {
+        video_format_ = NTV2_FORMAT_1080p_2398;
+      } else if (framerate_ == 24) {
+        video_format_ = NTV2_FORMAT_1080p_2400;
+      } else if (framerate_ == 25) {
+        video_format_ = NTV2_FORMAT_1080p_2500;
+      } else if (framerate_ == 29) {
+        video_format_ = NTV2_FORMAT_1080p_2997;
+      } else if (framerate_ == 30) {
+        video_format_ = NTV2_FORMAT_1080p_3000;
+      } else if (framerate_ == 50) {
+        video_format_ = NTV2_FORMAT_1080p_5000_A;
+      } else if (framerate_ == 59) {
+        video_format_ = NTV2_FORMAT_1080p_5994_A;
+      } else if (framerate_ == 60) {
+        video_format_ = NTV2_FORMAT_1080p_6000_A;
+      }
+    } else if (width_ == 3840 && height_ == 2160) {
+      if (framerate_ == 23) {
+        video_format_ = NTV2_FORMAT_3840x2160p_2398;
+      } else if (framerate_ == 24) {
+        video_format_ = NTV2_FORMAT_3840x2160p_2400;
+      } else if (framerate_ == 25) {
+        video_format_ = NTV2_FORMAT_3840x2160p_2500;
+      } else if (framerate_ == 29) {
+        video_format_ = NTV2_FORMAT_3840x2160p_2997;
+      } else if (framerate_ == 30) {
+        video_format_ = NTV2_FORMAT_3840x2160p_3000;
+      } else if (framerate_ == 50) {
+        video_format_ = NTV2_FORMAT_3840x2160p_5000;
+      } else if (framerate_ == 59) {
+        video_format_ = NTV2_FORMAT_3840x2160p_5994;
+      } else if (framerate_ == 60) {
+        video_format_ = NTV2_FORMAT_3840x2160p_6000;
+      }
+    } else if (width_ == 4096 && height_ == 2160) {
+      if (framerate_ == 23) {
+        video_format_ = NTV2_FORMAT_4096x2160p_2398;
+      } else if (framerate_ == 24) {
+        video_format_ = NTV2_FORMAT_4096x2160p_2400;
+      } else if (framerate_ == 25) {
+        video_format_ = NTV2_FORMAT_4096x2160p_2500;
+      } else if (framerate_ == 29) {
+        video_format_ = NTV2_FORMAT_4096x2160p_2997;
+      } else if (framerate_ == 30) {
+        video_format_ = NTV2_FORMAT_4096x2160p_3000;
+      } else if (framerate_ == 50) {
+        video_format_ = NTV2_FORMAT_4096x2160p_5000;
+      } else if (framerate_ == 59) {
+        video_format_ = NTV2_FORMAT_4096x2160p_5994;
+      } else if (framerate_ == 60) {
+        video_format_ = NTV2_FORMAT_4096x2160p_6000;
+      }
+    }
   }
 
-  return AJA_STATUS_SUCCESS;
+  return (video_format_ == NTV2_FORMAT_UNKNOWN) ? AJA_STATUS_UNSUPPORTED : AJA_STATUS_SUCCESS;
 }
 
 AJAStatus AJASourceOp::OpenDevice() {
@@ -383,7 +455,19 @@ void AJASourceOp::start() {
   cudaGetDeviceProperties(&prop, 0);
   is_igpu_ = prop.integrated;
 
-  HOLOSCAN_LOG_INFO("AJA Source: Capturing from NTV2_CHANNEL{}", (channel_.get() + 1));
+  float framerate;
+  if (framerate_ == 23) {
+    framerate = 23.98f;
+  } else if (framerate_ == 29) {
+    framerate = 29.97f;
+  } else if (framerate_ == 59) {
+    framerate = 59.94f;
+  } else {
+    framerate = framerate_;
+  }
+  HOLOSCAN_LOG_INFO("AJA Source: Capturing {}x{}@{}Hz {}from NTV2_CHANNEL{}",
+                    width_, height_, framerate, (interlaced_ ? "(interlaced) " : ""),
+                    (channel_.get() + 1));
   HOLOSCAN_LOG_INFO("AJA Source: RDMA is {}", use_rdma_ ? "enabled" : "disabled");
   if (enable_overlay_) {
     HOLOSCAN_LOG_INFO("AJA Source: Outputting overlay to NTV2_CHANNEL{}",
@@ -395,7 +479,7 @@ void AJASourceOp::start() {
 
   AJAStatus status = DetermineVideoFormat();
   if (AJA_FAILURE(status)) {
-    throw std::runtime_error("Video format could not be determined based on parameters.");
+    throw std::runtime_error("Video format could not be determined or is not supported.");
   }
 
   status = OpenDevice();
@@ -444,7 +528,7 @@ void AJASourceOp::compute(InputContext& op_input, OutputContext& op_output,
   // Update the next input frame and wait until it starts.
   uint32_t next_hw_frame = (current_hw_frame_ + 1) % 2;
   device_.SetInputFrame(channel_, next_hw_frame);
-  device_.WaitForInputVerticalInterrupt(channel_);
+  device_.WaitForInputFieldID(NTV2_FIELD0, channel_);
 
   // Read the last completed frame.
   auto size = GetVideoWriteSize(video_format_, pixel_format_);
diff --git a/src/operators/bayer_demosaic/bayer_demosaic.cpp b/src/operators/bayer_demosaic/bayer_demosaic.cpp
index 15f6e63..67f64a2 100644
--- a/src/operators/bayer_demosaic/bayer_demosaic.cpp
+++ b/src/operators/bayer_demosaic/bayer_demosaic.cpp
@@ -33,20 +33,7 @@
 #include "holoscan/core/operator_spec.hpp"
 #include "holoscan/core/resources/gxf/allocator.hpp"
 #include "holoscan/core/resources/gxf/cuda_stream_pool.hpp"
-
-#define CUDA_TRY(stmt)                                                                     \
-  ({                                                                                       \
-    cudaError_t _holoscan_cuda_err = stmt;                                                 \
-    if (cudaSuccess != _holoscan_cuda_err) {                                               \
-      GXF_LOG_ERROR("CUDA Runtime call %s in line %d of file %s failed with '%s' (%d).\n", \
-                    #stmt,                                                                 \
-                    __LINE__,                                                              \
-                    __FILE__,                                                              \
-                    cudaGetErrorString(_holoscan_cuda_err),                                \
-                    _holoscan_cuda_err);                                                   \
-    }                                                                                      \
-    _holoscan_cuda_err;                                                                    \
-  })
+#include "holoscan/utils/cuda_macros.hpp"
 
 namespace holoscan::ops {
 
@@ -201,10 +188,10 @@ void BayerDemosaicOp::compute(InputContext& op_input, OutputContext& op_output,
         }
       }
 
-      CUDA_TRY(cudaMemcpy(static_cast<void*>(device_scratch_buffer_.pointer()),
-                          static_cast<const void*>(frame->pointer()),
-                          buffer_size,
-                          cudaMemcpyHostToDevice));
+      HOLOSCAN_CUDA_CALL(cudaMemcpy(static_cast<void*>(device_scratch_buffer_.pointer()),
+                                    static_cast<const void*>(frame->pointer()),
+                                    buffer_size,
+                                    cudaMemcpyHostToDevice));
       input_data_ptr = device_scratch_buffer_.pointer();
     }
   } else {
@@ -221,11 +208,38 @@ void BayerDemosaicOp::compute(InputContext& op_input, OutputContext& op_output,
     // cast Holoscan::Tensor to nvidia::gxf::Tensor so attribute access code can remain as-is
     nvidia::gxf::Tensor in_tensor_gxf{in_tensor->dl_ctx()};
     auto in_rank = in_tensor_gxf.rank();
-    if (in_rank != 3) {
+    in_shape = in_tensor_gxf.shape();
+    if (in_rank < 2 || in_rank > 3) {
+      throw std::runtime_error(fmt::format(
+          "Input tensor has {} dimensions. Expected a tensor with two (HW) or three (HWC) "
+          "dimensions corresponding to a 1 channel 8-bit or 16-bit unsigned packed CFA grayscale "
+          "Bayer pattern.",
+          in_rank));
+    } else {
+      if (in_rank == 3) {
+        in_channels = in_shape.dimension(2);
+        if (in_channels != 1) {
+          throw std::runtime_error(fmt::format(
+              "For 3D HWC input, the number of channels, C, must be 1. Detected {} channels.",
+              in_channels));
+        }
+      } else {
+        in_channels = 1;
+      }
+    }
+    rows = in_shape.dimension(0);
+    if (rows % 2 != 0) {
       throw std::runtime_error(
-          fmt::format("Input tensor has {} dimensions. Expected a tensor with 3 dimensions "
-                      "(corresponding to an RGB or RGBA image).",
-                      in_rank));
+          fmt::format("Input tensor must have an even number of rows corresponding to a packed "
+                      "CFA grayscale Bayer pattern. Detected {} rows.",
+                      rows));
+    }
+    columns = in_shape.dimension(1);
+    if (columns % 2 != 0) {
+      throw std::runtime_error(
+          fmt::format("Input tensor must have an even number of columns corresponding to a packed "
+                      "CFA grayscale Bayer pattern. Detected {} columns.",
+                      columns));
     }
 
     DLDevice dev = in_tensor->device();
@@ -264,10 +278,10 @@ void BayerDemosaicOp::compute(InputContext& op_input, OutputContext& op_output,
         }
       }
 
-      CUDA_TRY(cudaMemcpy(static_cast<void*>(device_scratch_buffer_.pointer()),
-                          static_cast<const void*>(in_tensor_gxf.pointer()),
-                          buffer_size,
-                          cudaMemcpyHostToDevice));
+      HOLOSCAN_CUDA_CALL(cudaMemcpy(static_cast<void*>(device_scratch_buffer_.pointer()),
+                                    static_cast<const void*>(in_tensor_gxf.pointer()),
+                                    buffer_size,
+                                    cudaMemcpyHostToDevice));
       input_data_ptr = device_scratch_buffer_.pointer();
     } else {
       input_data_ptr = in_tensor_gxf.pointer();
@@ -277,10 +291,6 @@ void BayerDemosaicOp::compute(InputContext& op_input, OutputContext& op_output,
       // This should never happen, but just in case...
       HOLOSCAN_LOG_ERROR("Unable to get tensor data pointer. nullptr returned.");
     }
-    in_shape = in_tensor_gxf.shape();
-    rows = in_shape.dimension(0);
-    columns = in_shape.dimension(1);
-    in_channels = in_shape.dimension(2);
     element_type = in_tensor_gxf.element_type();
     element_size = nvidia::gxf::PrimitiveTypeSize(element_type);
     input_memory_type = in_tensor_gxf.storage_type();
diff --git a/src/operators/format_converter/format_converter.cpp b/src/operators/format_converter/format_converter.cpp
index 6ccf8ab..e401d5d 100644
--- a/src/operators/format_converter/format_converter.cpp
+++ b/src/operators/format_converter/format_converter.cpp
@@ -32,20 +32,7 @@
 #include "holoscan/core/operator_spec.hpp"
 #include "holoscan/core/resources/gxf/allocator.hpp"
 #include "holoscan/core/resources/gxf/cuda_stream_pool.hpp"
-
-#define CUDA_TRY(stmt)                                                                          \
-  ({                                                                                            \
-    cudaError_t _holoscan_cuda_err = stmt;                                                      \
-    if (cudaSuccess != _holoscan_cuda_err) {                                                    \
-      HOLOSCAN_LOG_ERROR("CUDA Runtime call {} in line {} of file {} failed with '{}' ({}).\n", \
-                         #stmt,                                                                 \
-                         __LINE__,                                                              \
-                         __FILE__,                                                              \
-                         cudaGetErrorString(_holoscan_cuda_err),                                \
-                         _holoscan_cuda_err);                                                   \
-    }                                                                                           \
-    _holoscan_cuda_err;                                                                         \
-  })
+#include "holoscan/utils/cuda_macros.hpp"
 
 namespace {
 
@@ -348,10 +335,10 @@ void FormatConverterOp::compute(InputContext& op_input, OutputContext& op_output
               fmt::format("Failed to allocate device scratch buffer ({} bytes)", buffer_size));
         }
       }
-      CUDA_TRY(cudaMemcpy(device_scratch_buffer_->pointer(),
-                          frame->pointer(),
-                          buffer_size,
-                          cudaMemcpyHostToDevice));
+      HOLOSCAN_CUDA_CALL(cudaMemcpy(device_scratch_buffer_->pointer(),
+                                    frame->pointer(),
+                                    buffer_size,
+                                    cudaMemcpyHostToDevice));
       in_tensor_data = device_scratch_buffer_->pointer();
       in_memory_storage_type = nvidia::gxf::MemoryStorageType::kDevice;
     }
@@ -418,10 +405,10 @@ void FormatConverterOp::compute(InputContext& op_input, OutputContext& op_output
         }
       }
 
-      CUDA_TRY(cudaMemcpy(static_cast<void*>(device_scratch_buffer_->pointer()),
-                          static_cast<const void*>(in_tensor_gxf.pointer()),
-                          buffer_size,
-                          cudaMemcpyHostToDevice));
+      HOLOSCAN_CUDA_CALL(cudaMemcpy(static_cast<void*>(device_scratch_buffer_->pointer()),
+                                    static_cast<const void*>(in_tensor_gxf.pointer()),
+                                    buffer_size,
+                                    cudaMemcpyHostToDevice));
       in_tensor_data = device_scratch_buffer_->pointer();
       in_memory_storage_type = nvidia::gxf::MemoryStorageType::kDevice;
     }
@@ -669,14 +656,15 @@ void FormatConverterOp::convertTensorFormat(
       const auto in_tensor_ptr = static_cast<const uint8_t*>(in_tensor_data);
       auto out_tensor_ptr = static_cast<uint8_t*>(out_tensor_data);
 
-      cudaError_t cuda_status = CUDA_TRY(cudaMemcpy2DAsync(out_tensor_ptr,
-                                                           dst_step,
-                                                           in_tensor_ptr,
-                                                           src_step,
-                                                           columns * out_channels * dst_typesize,
-                                                           rows,
-                                                           cudaMemcpyDeviceToDevice,
-                                                           npp_stream_ctx_.hStream));
+      cudaError_t cuda_status =
+          HOLOSCAN_CUDA_CALL(cudaMemcpy2DAsync(out_tensor_ptr,
+                                               dst_step,
+                                               in_tensor_ptr,
+                                               src_step,
+                                               columns * out_channels * dst_typesize,
+                                               rows,
+                                               cudaMemcpyDeviceToDevice,
+                                               npp_stream_ctx_.hStream));
       if (cuda_status) { throw std::runtime_error("Failed to copy GPU data to GPU memory."); }
       status = NPP_SUCCESS;
       break;
diff --git a/src/operators/holoviz/buffer_info.cpp b/src/operators/holoviz/buffer_info.cpp
index 233118f..a25c161 100644
--- a/src/operators/holoviz/buffer_info.cpp
+++ b/src/operators/holoviz/buffer_info.cpp
@@ -64,6 +64,16 @@ component_and_swizzle(HolovizOp::ImageFormat image_format) {
     case HolovizOp::ImageFormat::R8G8B8_UNORM:
     case HolovizOp::ImageFormat::R8G8B8_SNORM:
     case HolovizOp::ImageFormat::R8G8B8_SRGB:
+    case HolovizOp::ImageFormat::Y8U8Y8V8_422_UNORM:
+    case HolovizOp::ImageFormat::U8Y8V8Y8_422_UNORM:
+    case HolovizOp::ImageFormat::Y8_U8V8_2PLANE_420_UNORM:
+    case HolovizOp::ImageFormat::Y8_U8V8_2PLANE_422_UNORM:
+    case HolovizOp::ImageFormat::Y8_U8_V8_3PLANE_420_UNORM:
+    case HolovizOp::ImageFormat::Y8_U8_V8_3PLANE_422_UNORM:
+    case HolovizOp::ImageFormat::Y16_U16V16_2PLANE_420_UNORM:
+    case HolovizOp::ImageFormat::Y16_U16V16_2PLANE_422_UNORM:
+    case HolovizOp::ImageFormat::Y16_U16_V16_3PLANE_420_UNORM:
+    case HolovizOp::ImageFormat::Y16_U16_V16_3PLANE_422_UNORM:
       components = 3;
       component_swizzle[0] = viz::ComponentSwizzle::IDENTITY;
       component_swizzle[1] = viz::ComponentSwizzle::IDENTITY;
@@ -346,6 +356,64 @@ gxf_result_t BufferInfo::init(const nvidia::gxf::Handle<nvidia::gxf::VideoBuffer
       }
     }
 
+    if (image_format == HolovizOp::ImageFormat::AUTO_DETECT) {
+      struct YuvFormat {
+        nvidia::gxf::VideoFormat color_format_;
+        HolovizOp::ImageFormat format_;
+        HolovizOp::YuvModelConversion yuv_model_conversion_;
+        HolovizOp::YuvRange yuv_range_;
+      };
+      constexpr YuvFormat kYuvVideoToHolovizFormats[] = {
+          {nvidia::gxf::VideoFormat::GXF_VIDEO_FORMAT_YUV420,
+          HolovizOp::ImageFormat::Y8_U8_V8_3PLANE_420_UNORM,
+          HolovizOp::YuvModelConversion::YUV_601,
+          HolovizOp::YuvRange::ITU_NARROW},
+          {nvidia::gxf::VideoFormat::GXF_VIDEO_FORMAT_YUV420_ER,
+          HolovizOp::ImageFormat::Y8_U8_V8_3PLANE_420_UNORM,
+          HolovizOp::YuvModelConversion::YUV_601,
+          HolovizOp::YuvRange::ITU_FULL},
+          {nvidia::gxf::VideoFormat::GXF_VIDEO_FORMAT_YUV420_709,
+          HolovizOp::ImageFormat::Y8_U8_V8_3PLANE_420_UNORM,
+          HolovizOp::YuvModelConversion::YUV_709,
+          HolovizOp::YuvRange::ITU_NARROW},
+          {nvidia::gxf::VideoFormat::GXF_VIDEO_FORMAT_YUV420_709_ER,
+          HolovizOp::ImageFormat::Y8_U8_V8_3PLANE_420_UNORM,
+          HolovizOp::YuvModelConversion::YUV_709,
+          HolovizOp::YuvRange::ITU_FULL},
+          {nvidia::gxf::VideoFormat::GXF_VIDEO_FORMAT_NV12,
+          HolovizOp::ImageFormat::Y8_U8V8_2PLANE_420_UNORM,
+          HolovizOp::YuvModelConversion::YUV_601,
+          HolovizOp::YuvRange::ITU_NARROW},
+          {nvidia::gxf::VideoFormat::GXF_VIDEO_FORMAT_NV12_ER,
+          HolovizOp::ImageFormat::Y8_U8V8_2PLANE_420_UNORM,
+          HolovizOp::YuvModelConversion::YUV_601,
+          HolovizOp::YuvRange::ITU_FULL},
+          {nvidia::gxf::VideoFormat::GXF_VIDEO_FORMAT_NV12_709,
+          HolovizOp::ImageFormat::Y8_U8V8_2PLANE_420_UNORM,
+          HolovizOp::YuvModelConversion::YUV_709,
+          HolovizOp::YuvRange::ITU_NARROW},
+          {nvidia::gxf::VideoFormat::GXF_VIDEO_FORMAT_NV12_709_ER,
+          HolovizOp::ImageFormat::Y8_U8V8_2PLANE_420_UNORM,
+          HolovizOp::YuvModelConversion::YUV_709,
+          HolovizOp::YuvRange::ITU_FULL},
+      };
+
+      for (auto&& format : kYuvVideoToHolovizFormats) {
+        if (format.color_format_ == buffer_info.color_format) {
+          element_type = nvidia::gxf::PrimitiveType::kUnsigned8;
+          components = 3;
+          image_format = format.format_;
+          component_swizzle[0] = viz::ComponentSwizzle::IDENTITY;
+          component_swizzle[1] = viz::ComponentSwizzle::IDENTITY;
+          component_swizzle[2] = viz::ComponentSwizzle::IDENTITY;
+          component_swizzle[3] = viz::ComponentSwizzle::ONE;
+          yuv_model_conversion = format.yuv_model_conversion_;
+          yuv_range = format.yuv_range_;
+          break;
+        }
+      }
+    }
+
     if (image_format == HolovizOp::ImageFormat::AUTO_DETECT) {
       HOLOSCAN_LOG_ERROR("Video buffer '{}': unsupported input format: '{}'\n",
                          video.name(),
@@ -373,6 +441,8 @@ gxf_result_t BufferInfo::init(const nvidia::gxf::Handle<nvidia::gxf::VideoBuffer
   stride[1] = components;
   stride[2] = PrimitiveTypeSize(element_type);
 
+  color_planes = buffer_info.color_planes;
+
   return GXF_SUCCESS;
 }
 
diff --git a/src/operators/holoviz/holoviz.cpp b/src/operators/holoviz/holoviz.cpp
index 4dad368..dbf92aa 100644
--- a/src/operators/holoviz/holoviz.cpp
+++ b/src/operators/holoviz/holoviz.cpp
@@ -39,6 +39,7 @@
 #include "holoscan/core/resources/gxf/cuda_stream_pool.hpp"
 #include "holoscan/operators/holoviz/buffer_info.hpp"
 #include "holoscan/operators/holoviz/codecs.hpp"
+#include "holoscan/utils/cuda_macros.hpp"
 
 #include "gxf/multimedia/camera.hpp"
 #include "gxf/multimedia/video.hpp"
@@ -46,20 +47,6 @@
 #include "gxf/std/tensor.hpp"
 #include "holoviz/holoviz.hpp"  // holoviz module
 
-#define CUDA_TRY(stmt)                                                                          \
-  ({                                                                                            \
-    cudaError_t _holoscan_cuda_err = stmt;                                                      \
-    if (cudaSuccess != _holoscan_cuda_err) {                                                    \
-      HOLOSCAN_LOG_ERROR("CUDA Runtime call {} in line {} of file {} failed with '{}' ({}).\n", \
-                         #stmt,                                                                 \
-                         __LINE__,                                                              \
-                         __FILE__,                                                              \
-                         cudaGetErrorString(_holoscan_cuda_err),                                \
-                         _holoscan_cuda_err);                                                   \
-    }                                                                                           \
-    _holoscan_cuda_err;                                                                         \
-  })
-
 namespace viz = holoscan::viz;
 
 namespace {
@@ -117,7 +104,7 @@ static std::string inputTypeToString(holoscan::ops::HolovizOp::InputType input_t
 }
 
 /// table to convert image format to string
-static const std::array<std::pair<holoscan::ops::HolovizOp::ImageFormat, std::string>, 31>
+static const std::array<std::pair<holoscan::ops::HolovizOp::ImageFormat, std::string>, 41>
     kImageFormatToStr{
         {{holoscan::ops::HolovizOp::ImageFormat::AUTO_DETECT, "auto_detect"},
          {holoscan::ops::HolovizOp::ImageFormat::R8_UINT, "r8_uint"},
@@ -150,7 +137,25 @@ static const std::array<std::pair<holoscan::ops::HolovizOp::ImageFormat, std::st
          {holoscan::ops::HolovizOp::ImageFormat::B8G8R8A8_UNORM, "b8g8r8a8_unorm"},
          {holoscan::ops::HolovizOp::ImageFormat::B8G8R8A8_SRGB, "b8g8r8a8_srgb"},
          {holoscan::ops::HolovizOp::ImageFormat::A8B8G8R8_UNORM_PACK32, "a8b8g8r8_unorm_pack32"},
-         {holoscan::ops::HolovizOp::ImageFormat::A8B8G8R8_SRGB_PACK32, "a8b8g8r8_srgb_pack32"}}};
+         {holoscan::ops::HolovizOp::ImageFormat::A8B8G8R8_SRGB_PACK32, "a8b8g8r8_srgb_pack32"},
+         {holoscan::ops::HolovizOp::ImageFormat::Y8U8Y8V8_422_UNORM, "y8u8y8v8_422_unorm"},
+         {holoscan::ops::HolovizOp::ImageFormat::U8Y8V8Y8_422_UNORM, "u8y8v8y8_422_unorm"},
+         {holoscan::ops::HolovizOp::ImageFormat::Y8_U8V8_2PLANE_420_UNORM,
+          "y8_u8v8_2plane_420_unorm"},
+         {holoscan::ops::HolovizOp::ImageFormat::Y8_U8V8_2PLANE_422_UNORM,
+          "y8_u8v8_2plane_422_unorm"},
+         {holoscan::ops::HolovizOp::ImageFormat::Y8_U8_V8_3PLANE_420_UNORM,
+          "y8_u8_v8_3plane_420_unorm"},
+         {holoscan::ops::HolovizOp::ImageFormat::Y8_U8_V8_3PLANE_422_UNORM,
+          "y8_u8_v8_3plane_422_unorm"},
+         {holoscan::ops::HolovizOp::ImageFormat::Y16_U16V16_2PLANE_420_UNORM,
+          "y16_u16v16_2plane_420_unorm"},
+         {holoscan::ops::HolovizOp::ImageFormat::Y16_U16V16_2PLANE_422_UNORM,
+          "y16_u16v16_2plane_422_unorm"},
+         {holoscan::ops::HolovizOp::ImageFormat::Y16_U16_V16_3PLANE_420_UNORM,
+          "y16_u16_v16_3plane_420_unorm"},
+         {holoscan::ops::HolovizOp::ImageFormat::Y16_U16_V16_3PLANE_422_UNORM,
+          "y16_u16_v16_3plane_422_unorm"}}};
 
 /**
  * Convert a string to a image format enum
@@ -225,6 +230,123 @@ static std::string depthMapRenderModeToString(
   return "invalid";
 }
 
+/// table to convert yuv model conversion enum to string
+static const std::array<std::pair<holoscan::ops::HolovizOp::YuvModelConversion, std::string>, 3>
+    kYuvModelConversionToStr{
+        {{holoscan::ops::HolovizOp::YuvModelConversion::YUV_601, "yuv_601"},
+         {holoscan::ops::HolovizOp::YuvModelConversion::YUV_709, "yuv_709"},
+         {holoscan::ops::HolovizOp::YuvModelConversion::YUV_2020, "yuv_2020"}}};
+
+/**
+ * Convert a string to a yuv model conversion enum
+ *
+ * @param string yuv model conversion string
+ * @return yuv model conversion enum
+ */
+static nvidia::gxf::Expected<holoscan::ops::HolovizOp::YuvModelConversion>
+yuvModelConversionFromString(const std::string& string) {
+  const auto it = std::find_if(std::cbegin(kYuvModelConversionToStr),
+                               std::cend(kYuvModelConversionToStr),
+                               [&string](const auto& v) { return v.second == string; });
+  if (it != std::cend(kYuvModelConversionToStr)) { return it->first; }
+
+  HOLOSCAN_LOG_ERROR("Unsupported yuv model conversion '{}'", string);
+  return nvidia::gxf::Unexpected{GXF_FAILURE};
+}
+
+/**
+ * Convert a yuv model conversion enum to a string
+ *
+ * @param yuv_model_conversion yuv model conversion enum
+ * @return depth map render mode string
+ */
+static std::string yuvModelConversionToString(
+    holoscan::ops::HolovizOp::YuvModelConversion yuv_model_conversion) {
+  const auto it = std::find_if(
+      std::cbegin(kYuvModelConversionToStr),
+      std::cend(kYuvModelConversionToStr),
+      [&yuv_model_conversion](const auto& v) { return v.first == yuv_model_conversion; });
+  if (it != std::cend(kYuvModelConversionToStr)) { return it->second; }
+
+  return "invalid";
+}
+
+/// table to convert yuv range enum to string
+static const std::array<std::pair<holoscan::ops::HolovizOp::YuvRange, std::string>, 2>
+    kYuvRangeToStr{{{holoscan::ops::HolovizOp::YuvRange::ITU_FULL, "itu_full"},
+                    {holoscan::ops::HolovizOp::YuvRange::ITU_NARROW, "itu_narrow"}}};
+
+/**
+ * Convert a string to a yuv range enum
+ *
+ * @param string yuv range string
+ * @return yuv range enum
+ */
+static nvidia::gxf::Expected<holoscan::ops::HolovizOp::YuvRange> yuvRangeFromString(
+    const std::string& string) {
+  const auto it = std::find_if(std::cbegin(kYuvRangeToStr),
+                               std::cend(kYuvRangeToStr),
+                               [&string](const auto& v) { return v.second == string; });
+  if (it != std::cend(kYuvRangeToStr)) { return it->first; }
+
+  HOLOSCAN_LOG_ERROR("Unsupported yuv range '{}'", string);
+  return nvidia::gxf::Unexpected{GXF_FAILURE};
+}
+
+/**
+ * Convert a yuv range enum to a string
+ *
+ * @param yuv_range yuv range enum
+ * @return yuv range string
+ */
+static std::string yuvRangeToString(holoscan::ops::HolovizOp::YuvRange yuv_range) {
+  const auto it = std::find_if(std::cbegin(kYuvRangeToStr),
+                               std::cend(kYuvRangeToStr),
+                               [&yuv_range](const auto& v) { return v.first == yuv_range; });
+  if (it != std::cend(kYuvRangeToStr)) { return it->second; }
+
+  return "invalid";
+}
+
+/// table to convert chroma location enum to string
+static const std::array<std::pair<holoscan::ops::HolovizOp::ChromaLocation, std::string>, 2>
+    kChromaLoactionToStr{{{holoscan::ops::HolovizOp::ChromaLocation::COSITED_EVEN, "cosited_even"},
+                          {holoscan::ops::HolovizOp::ChromaLocation::MIDPOINT, "midpoint"}}};
+
+/**
+ * Convert a string to a chroma location enum
+ *
+ * @param string chroma location string
+ * @return chroma location enum
+ */
+static nvidia::gxf::Expected<holoscan::ops::HolovizOp::ChromaLocation> chromaLocationFromString(
+    const std::string& string) {
+  const auto it = std::find_if(std::cbegin(kChromaLoactionToStr),
+                               std::cend(kChromaLoactionToStr),
+                               [&string](const auto& v) { return v.second == string; });
+  if (it != std::cend(kChromaLoactionToStr)) { return it->first; }
+
+  HOLOSCAN_LOG_ERROR("Unsupported chroma location '{}'", string);
+  return nvidia::gxf::Unexpected{GXF_FAILURE};
+}
+
+/**
+ * Convert a chroma location enum to a string
+ *
+ * @param chroma_location chroma location enum
+ * @return chroma location string
+ */
+static std::string chromaLocationToString(
+    holoscan::ops::HolovizOp::ChromaLocation chroma_location) {
+  const auto it =
+      std::find_if(std::cbegin(kChromaLoactionToStr),
+                   std::cend(kChromaLoactionToStr),
+                   [&chroma_location](const auto& v) { return v.first == chroma_location; });
+  if (it != std::cend(kChromaLoactionToStr)) { return it->second; }
+
+  return "invalid";
+}
+
 /**
  * Try to detect the input type enum for given buffer properties.
  *
@@ -300,6 +422,26 @@ struct YAML::convert<holoscan::ops::HolovizOp::InputSpec> {
       case holoscan::ops::HolovizOp::InputType::COLOR_LUT:
       case holoscan::ops::HolovizOp::InputType::DEPTH_MAP_COLOR:
         node["image_format"] = imageFormatToString(input_spec.image_format_);
+        switch (input_spec.image_format_) {
+          case holoscan::ops::HolovizOp::ImageFormat::Y8U8Y8V8_422_UNORM:
+          case holoscan::ops::HolovizOp::ImageFormat::U8Y8V8Y8_422_UNORM:
+          case holoscan::ops::HolovizOp::ImageFormat::Y8_U8V8_2PLANE_420_UNORM:
+          case holoscan::ops::HolovizOp::ImageFormat::Y8_U8V8_2PLANE_422_UNORM:
+          case holoscan::ops::HolovizOp::ImageFormat::Y8_U8_V8_3PLANE_420_UNORM:
+          case holoscan::ops::HolovizOp::ImageFormat::Y8_U8_V8_3PLANE_422_UNORM:
+          case holoscan::ops::HolovizOp::ImageFormat::Y16_U16V16_2PLANE_420_UNORM:
+          case holoscan::ops::HolovizOp::ImageFormat::Y16_U16V16_2PLANE_422_UNORM:
+          case holoscan::ops::HolovizOp::ImageFormat::Y16_U16_V16_3PLANE_420_UNORM:
+          case holoscan::ops::HolovizOp::ImageFormat::Y16_U16_V16_3PLANE_422_UNORM:
+            node["yuv_model_conversion"] =
+                yuvModelConversionToString(input_spec.yuv_model_conversion_);
+            node["yuv_range"] = yuvRangeToString(input_spec.yuv_range_);
+            node["x_chroma_location"] = chromaLocationToString(input_spec.x_chroma_location_);
+            node["y_chroma_location"] = chromaLocationToString(input_spec.y_chroma_location_);
+            break;
+          default:
+            break;
+        }
         break;
       case holoscan::ops::HolovizOp::InputType::POINTS:
       case holoscan::ops::HolovizOp::InputType::LINES:
@@ -355,6 +497,31 @@ struct YAML::convert<holoscan::ops::HolovizOp::InputSpec> {
                 imageFormatFromString(node["image_format"].as<std::string>());
             if (maybe_image_format) { input_spec.image_format_ = maybe_image_format.value(); }
           }
+          if (node["yuv_model_conversion"]) {
+            const auto maybe_yuv_model_conversion =
+                yuvModelConversionFromString(node["yuv_model_conversion"].as<std::string>());
+            if (maybe_yuv_model_conversion) {
+              input_spec.yuv_model_conversion_ = maybe_yuv_model_conversion.value();
+            }
+          }
+          if (node["yuv_range"]) {
+            const auto maybe_yuv_range = yuvRangeFromString(node["yuv_range"].as<std::string>());
+            if (maybe_yuv_range) { input_spec.yuv_range_ = maybe_yuv_range.value(); }
+          }
+          if (node["x_chroma_location"]) {
+            const auto maybe_x_chroma_location =
+                chromaLocationFromString(node["x_chroma_location"].as<std::string>());
+            if (maybe_x_chroma_location) {
+              input_spec.x_chroma_location_ = maybe_x_chroma_location.value();
+            }
+          }
+          if (node["chroma_y_location"]) {
+            const auto maybe_y_chroma_location =
+                chromaLocationFromString(node["y_chroma_location"].as<std::string>());
+            if (maybe_y_chroma_location) {
+              input_spec.y_chroma_location_ = maybe_y_chroma_location.value();
+            }
+          }
           break;
         case holoscan::ops::HolovizOp::InputType::LINES:
         case holoscan::ops::HolovizOp::InputType::LINE_STRIP:
@@ -804,6 +971,59 @@ void HolovizOp::render_color_image(const InputSpec& input_spec, BufferInfo& buff
              lut_.data());
   }
 
+  switch (buffer_info.yuv_model_conversion) {
+    case YuvModelConversion::YUV_601:
+      viz::ImageYuvModelConversion(viz::YuvModelConversion::YUV_601);
+      break;
+    case YuvModelConversion::YUV_709:
+      viz::ImageYuvModelConversion(viz::YuvModelConversion::YUV_601);
+      break;
+    case YuvModelConversion::YUV_2020:
+      viz::ImageYuvModelConversion(viz::YuvModelConversion::YUV_2020);
+      break;
+    default:
+      throw std::runtime_error(
+          fmt::format("Unhandled yuv model conversion {}",
+                      yuvModelConversionToString(buffer_info.yuv_model_conversion)));
+  }
+
+  switch (buffer_info.yuv_range) {
+    case YuvRange::ITU_FULL:
+      viz::ImageYuvRange(viz::YuvRange::ITU_FULL);
+      break;
+    case YuvRange::ITU_NARROW:
+      viz::ImageYuvRange(viz::YuvRange::ITU_NARROW);
+      break;
+    default:
+      throw std::runtime_error(
+          fmt::format("Unhandled yuv range {}", yuvRangeToString(buffer_info.yuv_range)));
+  }
+
+  viz::ChromaLocation x_chroma_location, y_chroma_location;
+  switch (input_spec.x_chroma_location_) {
+    case ChromaLocation::COSITED_EVEN:
+      x_chroma_location = viz::ChromaLocation::COSITED_EVEN;
+      break;
+    case ChromaLocation::MIDPOINT:
+      x_chroma_location = viz::ChromaLocation::MIDPOINT;
+      break;
+    default:
+      throw std::runtime_error(fmt::format("Unhandled x chroma location {}",
+                                           chromaLocationToString(input_spec.x_chroma_location_)));
+  }
+  switch (input_spec.y_chroma_location_) {
+    case ChromaLocation::COSITED_EVEN:
+      y_chroma_location = viz::ChromaLocation::COSITED_EVEN;
+      break;
+    case ChromaLocation::MIDPOINT:
+      y_chroma_location = viz::ChromaLocation::MIDPOINT;
+      break;
+    default:
+      throw std::runtime_error(fmt::format("Unhandled y chroma location {}",
+                                           chromaLocationToString(input_spec.y_chroma_location_)));
+  }
+  viz::ImageChromaLocation(x_chroma_location, y_chroma_location);
+
   viz::ImageComponentMapping(buffer_info.component_swizzle[0],
                              buffer_info.component_swizzle[1],
                              buffer_info.component_swizzle[2],
@@ -811,19 +1031,53 @@ void HolovizOp::render_color_image(const InputSpec& input_spec, BufferInfo& buff
   if (buffer_info.storage_type == nvidia::gxf::MemoryStorageType::kDevice) {
     // if it's the device convert to `CUDeviceptr`
     const auto cu_buffer_ptr = reinterpret_cast<CUdeviceptr>(buffer_info.buffer_ptr);
+    CUdeviceptr cu_buffer_ptr_plane_1 = 0;
+    size_t row_pitch_plane_1 = 0;
+    CUdeviceptr cu_buffer_ptr_plane_2 = 0;
+    size_t row_pitch_plane_2 = 0;
+    if (buffer_info.color_planes.size() >= 2) {
+      cu_buffer_ptr_plane_1 = cu_buffer_ptr + buffer_info.color_planes[1].offset;
+      row_pitch_plane_1 = buffer_info.color_planes[1].stride;
+    }
+    if (buffer_info.color_planes.size() >= 3) {
+      cu_buffer_ptr_plane_2 = cu_buffer_ptr + buffer_info.color_planes[2].offset;
+      row_pitch_plane_2 = buffer_info.color_planes[2].stride;
+    }
     viz::ImageCudaDevice(buffer_info.width,
                          buffer_info.height,
                          image_format,
                          cu_buffer_ptr,
-                         buffer_info.stride[0]);
+                         buffer_info.stride[0],
+                         cu_buffer_ptr_plane_1,
+                         row_pitch_plane_1,
+                         cu_buffer_ptr_plane_2,
+                         row_pitch_plane_2);
   } else {
     // convert to void * if using the system/host
     const auto host_buffer_ptr = reinterpret_cast<const void*>(buffer_info.buffer_ptr);
+    const void* host_buffer_ptr_plane_1 = nullptr;
+    size_t row_pitch_plane_1 = 0;
+    const void* host_buffer_ptr_plane_2 = nullptr;
+    size_t row_pitch_plane_2 = 0;
+    if (buffer_info.color_planes.size() >= 2) {
+      host_buffer_ptr_plane_1 = reinterpret_cast<const void*>(uintptr_t(host_buffer_ptr) +
+                                                              buffer_info.color_planes[1].offset);
+      row_pitch_plane_1 = buffer_info.color_planes[1].stride;
+    }
+    if (buffer_info.color_planes.size() >= 3) {
+      host_buffer_ptr_plane_2 = reinterpret_cast<const void*>(uintptr_t(host_buffer_ptr) +
+                                                              buffer_info.color_planes[2].offset);
+      row_pitch_plane_2 = buffer_info.color_planes[2].stride;
+    }
     viz::ImageHost(buffer_info.width,
                    buffer_info.height,
                    image_format,
                    host_buffer_ptr,
-                   buffer_info.stride[0]);
+                   buffer_info.stride[0],
+                   host_buffer_ptr_plane_1,
+                   row_pitch_plane_1,
+                   host_buffer_ptr_plane_2,
+                   row_pitch_plane_2);
   }
   viz::EndLayer();
 }
@@ -844,13 +1098,14 @@ void HolovizOp::render_geometry(const ExecutionContext& context, const InputSpec
     host_buffer.resize(buffer_info.bytes_size);
 
     // copy from device to host
-    CUDA_TRY(cudaMemcpyAsync(static_cast<void*>(host_buffer.data()),
-                             static_cast<const void*>(buffer_info.buffer_ptr),
-                             buffer_info.bytes_size,
-                             cudaMemcpyDeviceToHost,
-                             cuda_stream_handler_.get_cuda_stream(context.context())));
+    HOLOSCAN_CUDA_CALL(cudaMemcpyAsync(static_cast<void*>(host_buffer.data()),
+                                       static_cast<const void*>(buffer_info.buffer_ptr),
+                                       buffer_info.bytes_size,
+                                       cudaMemcpyDeviceToHost,
+                                       cuda_stream_handler_.get_cuda_stream(context.context())));
     // wait for the CUDA memory copy to finish
-    CUDA_TRY(cudaStreamSynchronize(cuda_stream_handler_.get_cuda_stream(context.context())));
+    HOLOSCAN_CUDA_CALL(
+        cudaStreamSynchronize(cuda_stream_handler_.get_cuda_stream(context.context())));
 
     buffer_info.buffer_ptr = host_buffer.data();
   }
@@ -987,9 +1242,8 @@ void HolovizOp::render_geometry(const ExecutionContext& context, const InputSpec
         // oval primitive, an oval primitive is defined by the center coordinate and the axis
         // sizes (xi, yi, sxi, syi)
         if ((buffer_info.components < 2) || (buffer_info.components > 4)) {
-          throw std::runtime_error(
-              fmt::format("Expected two, three or four values per oval, but got '{}'",
-                          buffer_info.components));
+          throw std::runtime_error(fmt::format(
+              "Expected two, three or four values per oval, but got '{}'", buffer_info.components));
         }
         topology = viz::PrimitiveTopology::OVAL_LIST;
         primitive_count = coordinates;
@@ -1209,9 +1463,6 @@ void HolovizOp::start() {
   // make the instance current
   ScopedPushInstance scoped_instance(instance_);
 
-  if (framebuffer_srgb_) {
-    viz::SetSurfaceFormat({viz::ImageFormat::B8G8R8A8_SRGB, viz::ColorSpace::SRGB_NONLINEAR});
-  }
   if (vsync_) { viz::SetPresentMode(viz::PresentMode::FIFO); }
 
   // initialize Holoviz
@@ -1233,6 +1484,32 @@ void HolovizOp::start() {
               display_name_.get().empty() ? nullptr : display_name_.get().c_str());
   }
 
+  if (framebuffer_srgb_) {
+    // If the SRGB framebuffer is enabled, get the supported surface formats and look for a
+    // sRGB format.
+    uint32_t surface_format_count = 0;
+    viz::GetSurfaceFormats(&surface_format_count, nullptr);
+    std::vector<viz::SurfaceFormat> surface_formats(surface_format_count);
+    viz::GetSurfaceFormats(&surface_format_count, surface_formats.data());
+
+    bool found = false;
+    for (auto surface_format_it = surface_formats.begin();
+         surface_format_it != surface_formats.end();
+         ++surface_format_it) {
+      // Ignore the color space, it might be `SRGB_NONLINEAR` if a display is connected or
+      // `PASS_THROUGH` in headless mode.
+      if ((surface_format_it->image_format_ == viz::ImageFormat::R8G8B8A8_SRGB) ||
+          (surface_format_it->image_format_ == viz::ImageFormat::B8G8R8A8_SRGB) ||
+          (surface_format_it->image_format_ == viz::ImageFormat::A8B8G8R8_SRGB_PACK32)) {
+        viz::SetSurfaceFormat(*surface_format_it);
+        found = true;
+        break;
+      }
+    }
+
+    if (!found) { throw std::runtime_error("No sRGB framebuffer format found."); }
+  }
+
   // initialize the camera with the provided parameters
   camera_eye_cur_ = camera_eye_.get();
   camera_look_at_cur_ = camera_look_at_.get();
diff --git a/src/operators/ping_tensor_rx/CMakeLists.txt b/src/operators/ping_tensor_rx/CMakeLists.txt
new file mode 100644
index 0000000..920517b
--- /dev/null
+++ b/src/operators/ping_tensor_rx/CMakeLists.txt
@@ -0,0 +1,21 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_holoscan_operator(ping_tensor_rx ping_tensor_rx.cpp)
+
+target_link_libraries(op_ping_tensor_rx
+    PUBLIC
+        holoscan::core
+)
diff --git a/tests/system/ping_tensor_rx_op.cpp b/src/operators/ping_tensor_rx/ping_tensor_rx.cpp
similarity index 50%
rename from tests/system/ping_tensor_rx_op.cpp
rename to src/operators/ping_tensor_rx/ping_tensor_rx.cpp
index 290bdda..0eccc2f 100644
--- a/tests/system/ping_tensor_rx_op.cpp
+++ b/src/operators/ping_tensor_rx/ping_tensor_rx.cpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,37 +15,37 @@
  * limitations under the License.
  */
 
-#include "ping_tensor_rx_op.hpp"
+#include "holoscan/operators/ping_tensor_rx/ping_tensor_rx.hpp"
 
 #include <cuda_runtime.h>
 
 #include <string>
 
-namespace holoscan {
-namespace ops {
+namespace holoscan::ops {
 
 void PingTensorRxOp::setup(OperatorSpec& spec) {
   spec.input<TensorMap>("in");
-
-  spec.param(tensor_name_,
-             "tensor_name",
-             "output tensor name",
-             "output tensor name (default: tensor)",
-             std::string{"tensor"});
 }
 
 void PingTensorRxOp::compute(InputContext& op_input, OutputContext&, ExecutionContext&) {
-  auto value = op_input.receive<TensorMap>("in").value();
-  auto& tensor = value[tensor_name_.get()];
-  if (tensor->data() == nullptr) {
-    HOLOSCAN_LOG_ERROR("Received tensor with null data");
+  auto maybe_in_message = op_input.receive<holoscan::TensorMap>("in");
+  if (!maybe_in_message) {
+    HOLOSCAN_LOG_ERROR("Failed to receive message from port 'in'");
     return;
   }
-  uint8_t data = 0;
-  cudaMemcpy(&data, tensor->data(), 1, cudaMemcpyDeviceToHost);
-  HOLOSCAN_LOG_INFO(
-      "Rx message value - name:{}, data[0]:{}, nbytes:{}", name(), data, tensor->nbytes());
+  auto in_message = maybe_in_message.value();
+  // Loop over any tensors found, printing their names and shapes.
+  for (auto& [key, tensor] : in_message) {
+    if (tensor->data() == nullptr) {
+      HOLOSCAN_LOG_ERROR("Received tensor named '{}' with null data", key);
+      continue;
+    }
+    HOLOSCAN_LOG_INFO("{} received message {}: Tensor key: '{}', shape: ({})",
+                      name(),
+                      count_++,
+                      key,
+                      fmt::join(tensor->shape(), ", "));
+  }
 }
 
-}  // namespace ops
-}  // namespace holoscan
+}  // namespace holoscan::ops
diff --git a/src/operators/ping_tensor_tx/CMakeLists.txt b/src/operators/ping_tensor_tx/CMakeLists.txt
new file mode 100644
index 0000000..3939bb3
--- /dev/null
+++ b/src/operators/ping_tensor_tx/CMakeLists.txt
@@ -0,0 +1,21 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_holoscan_operator(ping_tensor_tx ping_tensor_tx.cpp)
+
+target_link_libraries(op_ping_tensor_tx
+    PUBLIC
+        holoscan::core
+)
diff --git a/examples/ping_distributed/cpp/ping_distributed_ops.cpp b/src/operators/ping_tensor_tx/ping_tensor_tx.cpp
similarity index 84%
rename from examples/ping_distributed/cpp/ping_distributed_ops.cpp
rename to src/operators/ping_tensor_tx/ping_tensor_tx.cpp
index b6276a7..a96d413 100644
--- a/examples/ping_distributed/cpp/ping_distributed_ops.cpp
+++ b/src/operators/ping_tensor_tx/ping_tensor_tx.cpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,18 +14,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "ping_distributed_ops.hpp"
 
-#include <algorithm>
+#include "holoscan/operators/ping_tensor_tx/ping_tensor_tx.hpp"
+
+#include <cuda_runtime.h>
+
 #include <cstdint>
 #include <memory>
 #include <string>
 #include <vector>
 
-#include <holoscan/holoscan.hpp>
-
-#include "gxf/std/allocator.hpp"
-#include "gxf/std/tensor.hpp"
+#include <gxf/std/allocator.hpp>
+#include "holoscan/utils/cuda_macros.hpp"
 
 namespace holoscan::ops {
 
@@ -48,11 +48,11 @@ void PingTensorTxOp::setup(OperatorSpec& spec) {
   spec.output<holoscan::TensorMap>("out");
 
   spec.param(allocator_, "allocator", "Allocator", "Allocator used to allocate tensor output.");
-  spec.param(tensor_on_gpu_,
-             "tensor_on_gpu",
-             "Tensor on GPU",
-             "Flag indicating that the tensor transmitted should be on the device.",
-             true);
+  spec.param(storage_type_,
+             "storage_type",
+             "memory storage type",
+             "nvidia::gxf::MemoryStorageType enum indicating where the memory will be stored",
+             std::string("system"));
   spec.param(batch_size_,
              "batch_size",
              "batch size",
@@ -86,8 +86,8 @@ void PingTensorTxOp::setup(OperatorSpec& spec) {
   spec.param(tensor_name_,
              "tensor_name",
              "output tensor name",
-             "output tensor name (default: out)",
-             std::string{"out"});
+             "output tensor name (default: tensor)",
+             std::string{"tensor"});
 }
 
 nvidia::gxf::PrimitiveType PingTensorTxOp::primitive_type(const std::string& data_type) {
@@ -125,7 +125,6 @@ void PingTensorTxOp::compute(InputContext&, OutputContext& op_output, ExecutionC
   TensorMap out_message;
 
   auto gxf_context = context.context();
-  auto frag = fragment();
 
   // get Handle to underlying nvidia::gxf::Allocator from std::shared_ptr<holoscan::Allocator>
   auto allocator =
@@ -150,10 +149,18 @@ void PingTensorTxOp::compute(InputContext&, OutputContext& op_output, ExecutionC
   const uint64_t bytes_per_element = nvidia::gxf::PrimitiveTypeSize(dtype);
   auto strides = nvidia::gxf::ComputeTrivialStrides(tensor_shape, bytes_per_element);
   nvidia::gxf::MemoryStorageType storage_type;
-  if (tensor_on_gpu_) {
+  auto storage_name = storage_type_.get();
+  HOLOSCAN_LOG_DEBUG("storage_name = {}", storage_name);
+  if (storage_name == std::string("device")) {
     storage_type = nvidia::gxf::MemoryStorageType::kDevice;
-  } else {
+  } else if (storage_name == std::string("host")) {
+    storage_type = nvidia::gxf::MemoryStorageType::kHost;
+  } else if (storage_name == std::string("system")) {
     storage_type = nvidia::gxf::MemoryStorageType::kSystem;
+  } else {
+    throw std::runtime_error(fmt::format(
+        "Unrecognized storage_device ({}), should be one of {'device', 'host', 'system'}",
+        storage_name));
   }
 
   // allocate a tensor of the specified shape and data type
@@ -177,18 +184,4 @@ void PingTensorTxOp::compute(InputContext&, OutputContext& op_output, ExecutionC
   HOLOSCAN_LOG_INFO("Sent message {}", count_++);
 }
 
-void PingTensorRxOp::setup(OperatorSpec& spec) {
-  spec.input<holoscan::TensorMap>("in");
-}
-
-void PingTensorRxOp::compute(InputContext& op_input, OutputContext&, ExecutionContext&) {
-  auto in_message = op_input.receive<holoscan::TensorMap>("in").value();
-  TensorMap out_message;
-  for (auto& [key, tensor] : in_message) {  // Process with 'tensor' here.
-    HOLOSCAN_LOG_INFO("Received message {}: Tensor key: '{}', shape: ({})",
-                      count_++,
-                      key,
-                      fmt::join(tensor->shape(), ", "));
-  }
-}
 }  // namespace holoscan::ops
diff --git a/src/operators/v4l2_video_capture/v4l2_video_capture.cpp b/src/operators/v4l2_video_capture/v4l2_video_capture.cpp
index 2c2e519..e98e667 100644
--- a/src/operators/v4l2_video_capture/v4l2_video_capture.cpp
+++ b/src/operators/v4l2_video_capture/v4l2_video_capture.cpp
@@ -36,7 +36,7 @@
 
 #define CLEAR(x) memset(&(x), 0, sizeof(x))
 
-bool pixel_format_supported(int fd, unsigned int pixel_format_fourcc) {
+static bool pixel_format_supported(int fd, unsigned int pixel_format_fourcc) {
   struct v4l2_fmtdesc fmtdesc;
   CLEAR(fmtdesc);
   fmtdesc.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
@@ -51,6 +51,102 @@ bool pixel_format_supported(int fd, unsigned int pixel_format_fourcc) {
   return supported_format;
 }
 
+static void YUYVToRGBA(const void* yuyv, void* rgba, size_t width, size_t height) {
+  auto r_convert = [](int y, int cr) {
+    double r = y + (1.4065 * (cr - 128));
+    return static_cast<unsigned int>(std::max(0, std::min(255, static_cast<int>(r))));
+  };
+  auto g_convert = [](int y, int cb, int cr) {
+    double g = y - (0.3455 * (cb - 128)) - (0.7169 * (cr - 128));
+    return static_cast<unsigned int>(std::max(0, std::min(255, static_cast<int>(g))));
+  };
+  auto b_convert = [](int y, int cb) {
+    double b = y + (1.7790 * (cb - 128));
+    return static_cast<unsigned int>(std::max(0, std::min(255, static_cast<int>(b))));
+  };
+
+  const unsigned char* yuyv_buf = static_cast<const unsigned char*>(yuyv);
+  unsigned char* rgba_buf = static_cast<unsigned char*>(rgba);
+
+  for (unsigned int i = 0, j = 0; i < width * height * 4; i += 8, j += 4) {
+    int cb = yuyv_buf[j + 1];
+    int cr = yuyv_buf[j + 3];
+
+    // First pixel
+    int y = yuyv_buf[j];
+    rgba_buf[i] = r_convert(y, cr);
+    rgba_buf[i + 1] = g_convert(y, cb, cr);
+    rgba_buf[i + 2] = b_convert(y, cb);
+    rgba_buf[i + 3] = 255;
+
+    // Second pixel
+    y = yuyv_buf[j + 2];
+    rgba_buf[i + 4] = r_convert(y, cr);
+    rgba_buf[i + 5] = g_convert(y, cb, cr);
+    rgba_buf[i + 6] = b_convert(y, cb);
+    rgba_buf[i + 7] = 255;
+  }
+}
+
+// Support for RGB24 format (`RGB3` in 4CC code)
+// For every pixel, add alpha channel to get RGBA
+static void RGB24ToRGBA(const void* rgb3, void* rgba, size_t width, size_t height) {
+  const unsigned char* rgb3_buf = static_cast<const unsigned char*>(rgb3);
+  unsigned char* rgba_buf = static_cast<unsigned char*>(rgba);
+
+  for (unsigned int i = 0, j = 0; i < width * height * 3; i += 3, j += 4) {
+    rgba_buf[j] = rgb3_buf[i];
+    rgba_buf[j + 1] = rgb3_buf[i + 1];
+    rgba_buf[j + 2] = rgb3_buf[i + 2];
+    rgba_buf[j + 3] = 255;
+  }
+}
+
+// Support for MJPEG format
+// Each frame is a JPEG image so use libjpeg to decompress the image and modify it to
+// add alpha channel
+static void MJPEGToRGBA(const void* mjpg, void* rgba, size_t width, size_t height) {
+  struct jpeg_decompress_struct cinfo;
+  struct jpeg_error_mgr jerr;
+  // Size of image is width * height * 3 (RGB)
+  unsigned long jpg_size = width * height * 3;
+  int row_stride;
+
+  cinfo.err = jpeg_std_error(&jerr);
+  jpeg_create_decompress(&cinfo);
+
+  const unsigned char* src_buf =
+      const_cast<unsigned char*>(static_cast<const unsigned char*>(mjpg));
+  unsigned char* dest_buf = static_cast<unsigned char*>(rgba);
+  jpeg_mem_src(&cinfo, src_buf, jpg_size);
+  int rc = jpeg_read_header(&cinfo, TRUE);
+
+  if (rc != 1) { throw std::runtime_error("Failed to read jpeg header"); }
+
+  jpeg_start_decompress(&cinfo);
+
+  // Each row has width * 4 pixels (RGBA)
+  row_stride = width * 4;
+
+  while (cinfo.output_scanline < cinfo.output_height) {
+    unsigned char* buffer_array[1];
+    buffer_array[0] = dest_buf + (cinfo.output_scanline) * row_stride;
+    // Decompress jpeg image and write it to buffer_arary
+    jpeg_read_scanlines(&cinfo, buffer_array, 1);
+    unsigned char* buf = buffer_array[0];
+    // Modify image to add alpha channel with values set to 255
+    // start from the end so we don't overwrite existing values
+    for (int i = (int)width * 3 - 1, j = row_stride - 1; i > 0; i -= 3, j -= 4) {
+      buf[j] = 255;
+      buf[j - 1] = buf[i];
+      buf[j - 2] = buf[i - 1];
+      buf[j - 3] = buf[i - 2];
+    }
+  }
+  jpeg_finish_decompress(&cinfo);
+  jpeg_destroy_decompress(&cinfo);
+}
+
 namespace holoscan::ops {
 
 void V4L2VideoCaptureOp::setup(OperatorSpec& spec) {
@@ -60,6 +156,7 @@ void V4L2VideoCaptureOp::setup(OperatorSpec& spec) {
 
   static constexpr char kDefaultDevice[] = "/dev/video0";
   static constexpr char kDefaultPixelFormat[] = "auto";
+  static constexpr bool kDefaultPassThrough = false;
   static constexpr uint32_t kDefaultWidth = 0;
   static constexpr uint32_t kDefaultHeight = 0;
   static constexpr uint32_t kDefaultNumBuffers = 4;
@@ -80,6 +177,12 @@ void V4L2VideoCaptureOp::setup(OperatorSpec& spec) {
              "Pixel Format",
              "Pixel format of capture stream (little endian four character code (fourcc))",
              std::string(kDefaultPixelFormat));
+  spec.param(pass_through_,
+             "pass_through",
+             "Pass Through",
+             "If set, pass through the input buffer to the output unmodified, else convert to "
+             "RGBA32",
+             kDefaultPassThrough);
   spec.param(exposure_time_,
              "exposure_time",
              "Exposure Time",
@@ -123,35 +226,27 @@ void V4L2VideoCaptureOp::compute(InputContext& op_input, OutputContext& op_outpu
   auto video_buffer = out_message.value().add<nvidia::gxf::VideoBuffer>();
   if (!video_buffer) { throw std::runtime_error("Failed to allocate video buffer; terminating."); }
 
-  // Get Handle to underlying nvidia::gxf::Allocator from std::shared_ptr<holoscan::Allocator>
-  auto allocator =
-      nvidia::gxf::Handle<nvidia::gxf::Allocator>::Create(context.context(), allocator_->gxf_cid());
-  // Allocate output buffer
-  video_buffer.value()->resize<nvidia::gxf::VideoFormat::GXF_VIDEO_FORMAT_RGBA>(
-      width_use_,
-      height_use_,
-      nvidia::gxf::SurfaceLayout::GXF_SURFACE_LAYOUT_PITCH_LINEAR,
-      nvidia::gxf::MemoryStorageType::kHost,
-      allocator.value(),
-      false);
-  if (!video_buffer.value()->pointer()) {
-    throw std::runtime_error("Failed to allocate output buffer.");
-  }
-
-  // Wrap buffer
   Buffer& read_buf = buffers_[buf.index];
-  if (pixel_format_use_ == V4L2_PIX_FMT_YUYV) {
-    // Convert YUYV to RGBA output buffer
-    YUYVToRGBA(read_buf.ptr, video_buffer.value()->pointer(), width_use_, height_use_);
 
-    // Return (queue) the buffer.
-    if (ioctl(fd_, VIDIOC_QBUF, &buf) < 0) {
-      throw std::runtime_error(
-          fmt::format("Failed to queue buffer {} on {}", buf.index, device_.get().c_str()));
+  if (converter_) {
+    // Convert to RGBA output buffer
+
+    // Get Handle to underlying nvidia::gxf::Allocator from std::shared_ptr<holoscan::Allocator>
+    auto allocator = nvidia::gxf::Handle<nvidia::gxf::Allocator>::Create(context.context(),
+                                                                         allocator_->gxf_cid());
+    // Allocate output buffer
+    video_buffer.value()->resize<nvidia::gxf::VideoFormat::GXF_VIDEO_FORMAT_RGBA>(
+        width_use_,
+        height_use_,
+        nvidia::gxf::SurfaceLayout::GXF_SURFACE_LAYOUT_PITCH_LINEAR,
+        nvidia::gxf::MemoryStorageType::kHost,
+        allocator.value(),
+        false);
+    if (!video_buffer.value()->pointer()) {
+      throw std::runtime_error("Failed to allocate output buffer.");
     }
-  } else if (pixel_format_use_ == V4L2_PIX_FMT_MJPEG) {
-    // Convert MJPG to RGBA output buffer
-    MJPEGToRGBA(read_buf.ptr, video_buffer.value()->pointer(), width_use_, height_use_);
+
+    (*converter_)(read_buf.ptr, video_buffer.value()->pointer(), width_use_, height_use_);
 
     // Return (queue) the buffer.
     if (ioctl(fd_, VIDIOC_QBUF, &buf) < 0) {
@@ -160,8 +255,30 @@ void V4L2VideoCaptureOp::compute(InputContext& op_input, OutputContext& op_outpu
     }
   } else {
     // Wrap memory into output buffer
+    nvidia::gxf::VideoBufferInfo video_buffer_info{};
+    video_buffer_info.width = width_use_;
+    video_buffer_info.height = height_use_;
+    video_buffer_info.surface_layout = nvidia::gxf::SurfaceLayout::GXF_SURFACE_LAYOUT_PITCH_LINEAR;
+    auto& color_plane = video_buffer_info.color_planes.emplace_back();
+    color_plane = nvidia::gxf::ColorPlane{};
+    color_plane.width = width_use_;
+    color_plane.height = height_use_;
+    color_plane.size = buf.length;
+
+    if (pixel_format_use_ == V4L2_PIX_FMT_RGBA32) {
+      video_buffer_info.color_format = nvidia::gxf::VideoFormat::GXF_VIDEO_FORMAT_RGBA;
+      video_buffer_info.color_planes[0].bytes_per_pixel = 1;
+    } else if (pixel_format_use_ == V4L2_PIX_FMT_RGB24) {
+      video_buffer_info.color_format = nvidia::gxf::VideoFormat::GXF_VIDEO_FORMAT_RGB;
+      video_buffer_info.color_planes[0].bytes_per_pixel = 1;
+    } else {
+      // If there is no GXF VideoFormat for the V4L pixel format, set it to custom. In this case
+      // the downstream operator needs to be configured to expect the correct format.
+      video_buffer_info.color_format = nvidia::gxf::VideoFormat::GXF_VIDEO_FORMAT_CUSTOM;
+    }
+
     video_buffer.value()->wrapMemory(
-        video_buffer.value()->video_frame_info(),
+        video_buffer_info,
         buf.length,
         nvidia::gxf::MemoryStorageType::kHost,
         read_buf.ptr,
@@ -282,22 +399,35 @@ void V4L2VideoCaptureOp::v4l2_check_formats() {
     // Update format with valid user-given format
     pixel_format_use_ = pixel_format;
   } else if (pixel_format_.get() == "auto") {
-    // Currently, AB24, YUYV, and MJPG are supported in auto mode
-    uint32_t ab24 = v4l2_fourcc('A', 'B', '2', '4');
-    uint32_t yuyv = v4l2_fourcc('Y', 'U', 'Y', 'V');
-    uint32_t mjpg = v4l2_fourcc('M', 'J', 'P', 'G');
+    // Currently, AB24, YUYV, MJPG, and RGB3 are supported in auto mode
+    uint32_t ab24 = v4l2_fourcc('A', 'B', '2', '4');  // V4L2_PIX_FMT_RGBA32
+    uint32_t yuyv = v4l2_fourcc('Y', 'U', 'Y', 'V');  // V4L2_PIX_FMT_YUYV
+    uint32_t mjpg = v4l2_fourcc('M', 'J', 'P', 'G');  // V4L2_PIX_FMT_MJPEG
+    uint32_t rgb3 = v4l2_fourcc('R', 'G', 'B', '3');  // V4L2_PIX_FMT_RGB24
 
     if (pixel_format_supported(fd_, ab24)) {
       pixel_format_use_ = ab24;
     } else if (pixel_format_supported(fd_, yuyv)) {
       pixel_format_use_ = yuyv;
+    } else if (pixel_format_supported(fd_, rgb3)) {
+      pixel_format_use_ = rgb3;
     } else if (pixel_format_supported(fd_, mjpg)) {
       pixel_format_use_ = mjpg;
     } else {
       throw std::runtime_error(
-          "Automatic setting of pixel format failed: device does not support AB24, YUYV, or MJPG. "
-          " If you are sure that the device pixel format is RGBA, please specify the pixel format "
-          "in the yaml configuration file.");
+          "Automatic setting of pixel format failed: device does not support AB24, YUYV, MJPG, or "
+          "RGB3. If you are sure that the device pixel format is RGBA, please specify the pixel "
+          "format in the yaml configuration file.");
+    }
+  }
+
+  if (!pass_through_) {
+    if (pixel_format_use_ == V4L2_PIX_FMT_YUYV) {
+      converter_ = &YUYVToRGBA;
+    } else if (pixel_format_use_ == V4L2_PIX_FMT_RGB24) {
+      converter_ = &RGB24ToRGBA;
+    } else if (pixel_format_use_ == V4L2_PIX_FMT_MJPEG) {
+      converter_ = &MJPEGToRGBA;
     }
   }
 
@@ -308,6 +438,7 @@ void V4L2VideoCaptureOp::v4l2_check_formats() {
     frmsize.pixel_format = pixel_format_use_;
     int supported_formats = 0;
     while (ioctl(fd_, VIDIOC_ENUM_FRAMESIZES, &frmsize) == 0) {
+      supported_formats = 0;
       if (frmsize.type != V4L2_FRMSIZE_TYPE_DISCRETE) {
         throw std::runtime_error("Non-discrete frame sizes not supported");
       }
@@ -321,8 +452,16 @@ void V4L2VideoCaptureOp::v4l2_check_formats() {
           fmt::format("Device does not support '{}x{}'", width_.get(), height_.get()));
     }
     // Update format with valid user-given format
-    if (width_.get() > 0) width_use_ = width_.get();
-    if (height_.get() > 0) height_use_ = height_.get();
+    if (width_.get() > 0) {
+      width_use_ = width_.get();
+    } else {
+      width_use_ = frmsize.discrete.width;
+    }
+    if (height_.get() > 0) {
+      height_use_ = height_.get();
+    } else {
+      height_use_ = frmsize.discrete.height;
+    }
   }
 }
 
@@ -517,86 +656,4 @@ void V4L2VideoCaptureOp::v4l2_read_buffer(v4l2_buffer& buf) {
   }
 }
 
-void V4L2VideoCaptureOp::YUYVToRGBA(const void* yuyv, void* rgba, size_t width, size_t height) {
-  auto r_convert = [](int y, int cr) {
-    double r = y + (1.4065 * (cr - 128));
-    return static_cast<unsigned int>(std::max(0, std::min(255, static_cast<int>(r))));
-  };
-  auto g_convert = [](int y, int cb, int cr) {
-    double g = y - (0.3455 * (cb - 128)) - (0.7169 * (cr - 128));
-    return static_cast<unsigned int>(std::max(0, std::min(255, static_cast<int>(g))));
-  };
-  auto b_convert = [](int y, int cb) {
-    double b = y + (1.7790 * (cb - 128));
-    return static_cast<unsigned int>(std::max(0, std::min(255, static_cast<int>(b))));
-  };
-
-  const unsigned char* yuyv_buf = static_cast<const unsigned char*>(yuyv);
-  unsigned char* rgba_buf = static_cast<unsigned char*>(rgba);
-
-  for (unsigned int i = 0, j = 0; i < width * height * 4; i += 8, j += 4) {
-    int cb = yuyv_buf[j + 1];
-    int cr = yuyv_buf[j + 3];
-
-    // First pixel
-    int y = yuyv_buf[j];
-    rgba_buf[i] = r_convert(y, cr);
-    rgba_buf[i + 1] = g_convert(y, cb, cr);
-    rgba_buf[i + 2] = b_convert(y, cb);
-    rgba_buf[i + 3] = 255;
-
-    // Second pixel
-    y = yuyv_buf[j + 2];
-    rgba_buf[i + 4] = r_convert(y, cr);
-    rgba_buf[i + 5] = g_convert(y, cb, cr);
-    rgba_buf[i + 6] = b_convert(y, cb);
-    rgba_buf[i + 7] = 255;
-  }
-}
-
-// Support for MJPEG format
-// Each frame is a JPEG image so use libjpeg to decompress the image and modify it to
-// add alpha channel
-void V4L2VideoCaptureOp::MJPEGToRGBA(const void* mjpg, void* rgba, size_t width, size_t height) {
-  struct jpeg_decompress_struct cinfo;
-  struct jpeg_error_mgr jerr;
-  // Size of image is width * height * 3 (RGB)
-  unsigned long jpg_size = width * height * 3;
-  int row_stride;
-
-  cinfo.err = jpeg_std_error(&jerr);
-  jpeg_create_decompress(&cinfo);
-
-  const unsigned char* src_buf =
-      const_cast<unsigned char*>(static_cast<const unsigned char*>(mjpg));
-  unsigned char* dest_buf = static_cast<unsigned char*>(rgba);
-  jpeg_mem_src(&cinfo, src_buf, jpg_size);
-  int rc = jpeg_read_header(&cinfo, TRUE);
-
-  if (rc != 1) { throw std::runtime_error("Failed to read jpeg header"); }
-
-  jpeg_start_decompress(&cinfo);
-
-  // Each row has width * 4 pixels (RGBA)
-  row_stride = width * 4;
-
-  while (cinfo.output_scanline < cinfo.output_height) {
-    unsigned char* buffer_array[1];
-    buffer_array[0] = dest_buf + (cinfo.output_scanline) * row_stride;
-    // Decompress jpeg image and write it to buffer_arary
-    jpeg_read_scanlines(&cinfo, buffer_array, 1);
-    unsigned char* buf = buffer_array[0];
-    // Modify image to add alpha channel with values set to 255
-    // start from the end so we don't overwrite existing values
-    for (int i = (int)width * 3 - 1, j = row_stride - 1; i > 0; i -= 3, j -= 4) {
-      buf[j] = 255;
-      buf[j - 1] = buf[i];
-      buf[j - 2] = buf[i - 1];
-      buf[j - 3] = buf[i - 2];
-    }
-  }
-  jpeg_finish_decompress(&cinfo);
-  jpeg_destroy_decompress(&cinfo);
-}
-
 }  // namespace holoscan::ops
diff --git a/src/operators/video_stream_replayer/video_stream_replayer.cpp b/src/operators/video_stream_replayer/video_stream_replayer.cpp
index 48639d6..a451292 100644
--- a/src/operators/video_stream_replayer/video_stream_replayer.cpp
+++ b/src/operators/video_stream_replayer/video_stream_replayer.cpp
@@ -19,9 +19,11 @@
 
 #include <chrono>
 #include <cinttypes>
+#include <memory>
 #include <string>
 #include <thread>
 #include <utility>
+#include <vector>
 
 #include "gxf/core/expected.hpp"
 #include "gxf/serialization/entity_serializer.hpp"
@@ -32,6 +34,8 @@
 #include "holoscan/core/fragment.hpp"
 #include "holoscan/core/gxf/entity.hpp"
 #include "holoscan/core/operator_spec.hpp"
+#include "holoscan/core/resources/gxf/allocator.hpp"
+#include "holoscan/core/resources/gxf/std_component_serializer.hpp"
 #include "holoscan/core/resources/gxf/std_entity_serializer.hpp"
 
 namespace holoscan::ops {
@@ -44,7 +48,11 @@ void VideoStreamReplayerOp::setup(OperatorSpec& spec) {
              "Entity transmitter",
              "Transmitter channel for replaying entities",
              &output);
-
+  spec.param(allocator_,
+             "allocator",
+             "Tensor Memory Allocator",
+             "Tensor memory allocator for tensor components, only used if an entity_serializer is "
+             "not provided. The default allocator is a holoscan::gxf::UnboundedAllocator.");
   spec.param(entity_serializer_,
              "entity_serializer",
              "Entity serializer",
@@ -91,13 +99,49 @@ void VideoStreamReplayerOp::setup(OperatorSpec& spec) {
 void VideoStreamReplayerOp::initialize() {
   // Set up prerequisite parameters before calling Operator::initialize()
   auto frag = fragment();
-  auto entity_serializer =
-      frag->make_resource<holoscan::StdEntitySerializer>("replayer__std_entity_serializer");
-  if (graph_entity_) {
-    entity_serializer->gxf_eid(graph_entity_->eid());
-    entity_serializer->gxf_graph_entity(graph_entity_);
+
+  // Find if there is an argument for 'boolean_scheduling_term'
+  auto has_entity_serializer = std::find_if(args().begin(), args().end(), [](const auto& arg) {
+    return (arg.name() == "entity_serializer");
+  });
+  if (has_entity_serializer == args().end()) {
+    HOLOSCAN_LOG_TRACE(
+        "VideoStreamReplayerOp: entity_serializer argument not found, using default");
+
+    auto component_serializer = frag->make_resource<holoscan::StdComponentSerializer>(
+        "replayer__holoscan_component_serializer");
+
+    auto has_allocator = std::find_if(
+        args().begin(), args().end(), [](const auto& arg) { return (arg.name() == "allocator"); });
+    if (has_allocator == args().end()) {
+      HOLOSCAN_LOG_TRACE("VideoStreamReplayerOp: allocator argument not found, using default");
+      auto allocator = frag->make_resource<holoscan::UnboundedAllocator>("replayer__allocator");
+      component_serializer->add_arg(Arg("allocator", allocator));
+    } else {
+      HOLOSCAN_LOG_TRACE("VideoStreamReplayerOp: allocator found");
+      auto& allocator_arg = *has_allocator;
+      component_serializer->add_arg(allocator_arg);
+      if (graph_entity_) {
+        auto allocator = std::any_cast<std::shared_ptr<holoscan::Allocator>>(allocator_arg.value());
+        allocator->gxf_eid(graph_entity_->eid());
+        allocator->gxf_graph_entity(graph_entity_);
+      }
+    }
+
+    // Create new StdEntitySerializer with the provided allocator
+    auto entity_serializer = frag->make_resource<holoscan::StdEntitySerializer>(
+        "replayer__std_entity_serializer",
+        Arg("component_serializers", std::vector<std::shared_ptr<Resource>>{component_serializer}));
+    if (graph_entity_) {
+      component_serializer->gxf_eid(graph_entity_->eid());
+      component_serializer->gxf_graph_entity(graph_entity_);
+      entity_serializer->gxf_eid(graph_entity_->eid());
+      entity_serializer->gxf_graph_entity(graph_entity_);
+    }
+    add_arg(Arg("entity_serializer") = entity_serializer);
+  } else {
+    HOLOSCAN_LOG_TRACE("VideoStreamReplayerOp: entity_serializer argument found");
   }
-  add_arg(Arg("entity_serializer") = entity_serializer);
 
   // Find if there is an argument for 'boolean_scheduling_term'
   auto has_boolean_scheduling_term =
@@ -107,7 +151,7 @@ void VideoStreamReplayerOp::initialize() {
   // Create the BooleanCondition if there is no argument provided.
   if (has_boolean_scheduling_term == args().end()) {
     boolean_scheduling_term_ =
-        frag->make_condition<holoscan::BooleanCondition>("boolean_scheduling_term");
+        frag->make_condition<holoscan::BooleanCondition>("replayer__boolean_scheduling_term");
     add_arg(boolean_scheduling_term_.get());
   }
 
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 526d7cf..f417118 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -112,6 +112,8 @@ target_link_libraries(OPERATORS_CLASSES_TEST
   holoscan::ops::inference_processor
   holoscan::ops::ping_rx
   holoscan::ops::ping_tx
+  holoscan::ops::ping_tensor_rx
+  holoscan::ops::ping_tensor_tx
   holoscan::ops::segmentation_postprocessor
   holoscan::ops::v4l2
   holoscan::ops::video_stream_recorder
@@ -145,10 +147,9 @@ ConfigureTest(
   system/native_operator_ping_app.cpp
   system/operator_metadata_apps.cpp
   system/ping_rx_op.cpp
-  system/ping_tensor_rx_op.cpp
-  system/ping_tensor_tx_op.cpp
   system/ping_tx_op.cpp
   system/tensor_compare_op.cpp
+  system/video_stream_replayer_op.cpp
 )
 target_link_libraries(SYSTEM_TEST
   PRIVATE
@@ -160,6 +161,9 @@ target_link_libraries(SYSTEM_TEST
   holoscan::ops::holoviz
   holoscan::ops::ping_rx
   holoscan::ops::ping_tx
+  holoscan::ops::ping_tensor_rx
+  holoscan::ops::ping_tensor_tx
+  holoscan::ops::video_stream_replayer
 )
 
 ConfigureTest(
@@ -172,13 +176,13 @@ ConfigureTest(
   system/distributed/standalone_fragments.cpp
   system/distributed/ucx_message_serialization_ping_app.cpp
   system/env_wrapper.cpp
-  system/ping_tensor_rx_op.cpp
-  system/ping_tensor_tx_op.cpp
 )
 target_link_libraries(SYSTEM_DISTRIBUTED_TEST
   PRIVATE
   holoscan::ops::bayer_demosaic
   holoscan::ops::holoviz
+  holoscan::ops::ping_tensor_rx
+  holoscan::ops::ping_tensor_tx
 )
 
 # set environment variables used by distributed applications in the tests
@@ -215,13 +219,13 @@ ConfigureTest(
   system/distributed/holoscan_ucx_ports_env.cpp
   system/distributed/standalone_fragments.cpp
   system/env_wrapper.cpp
-  system/ping_tensor_rx_op.cpp
-  system/ping_tensor_tx_op.cpp
 )
 target_link_libraries(SYSTEM_DISTRIBUTED_EBS_TEST
   PRIVATE
   holoscan::ops::bayer_demosaic
   holoscan::ops::holoviz
+  holoscan::ops::ping_tensor_rx
+  holoscan::ops::ping_tensor_tx
 )
 
   set(CMAKE_SYSTEM_DISTRIBUTED_EBS_TEST_FLAGS
diff --git a/tests/core/app_driver.cpp b/tests/core/app_driver.cpp
index 2992f59..8c2bec3 100644
--- a/tests/core/app_driver.cpp
+++ b/tests/core/app_driver.cpp
@@ -53,8 +53,11 @@ TEST(AppDriver, TestSetUcxToExcludeCudaIpc) {
   std::string log_output = testing::internal::GetCapturedStderr();
   env_var = std::getenv("UCX_TLS");
   EXPECT_EQ(std::string{env_var}, std::string(new_env_var));
-  EXPECT_TRUE(log_output.find("warn") != std::string::npos);
-  EXPECT_TRUE(log_output.find("UCX_TLS is set") != std::string::npos);
+  EXPECT_TRUE(log_output.find("warn") != std::string::npos) << "=== LOG ===\n"
+                                                            << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("UCX_TLS is set") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 
   // restore the original environment variable
   if (env_orig) {
@@ -99,10 +102,15 @@ TEST(AppDriver, TestExcludeCudaIpcTransportOnIgpu) {
   env_var = std::getenv("UCX_TLS");
   EXPECT_EQ(std::string{env_var}, std::string(new_env_var));
   if (is_integrated) {
-    EXPECT_TRUE(log_output.find("warn") != std::string::npos);
-    EXPECT_TRUE(log_output.find("UCX_TLS is set") != std::string::npos);
+    EXPECT_TRUE(log_output.find("warn") != std::string::npos) << "=== LOG ===\n"
+                                                              << log_output << "\n===========\n";
+    EXPECT_TRUE(log_output.find("UCX_TLS is set") != std::string::npos)
+        << "=== LOG ===\n"
+        << log_output << "\n===========\n";
   } else {
-    EXPECT_TRUE(log_output.find("UCX_TLS is set") == std::string::npos);
+    EXPECT_TRUE(log_output.find("UCX_TLS is set") == std::string::npos)
+        << "=== LOG ===\n"
+        << log_output << "\n===========\n";
   }
 
   // restore the original environment variable
diff --git a/tests/core/application.cpp b/tests/core/application.cpp
index 3f94936..a3a4f93 100644
--- a/tests/core/application.cpp
+++ b/tests/core/application.cpp
@@ -111,13 +111,25 @@ TEST(Application, TestAppPrintOptions) {
 
   std::string log_output = testing::internal::GetCapturedStderr();
 
-  EXPECT_TRUE(log_output.find("run_driver: true") != std::string::npos);
-  EXPECT_TRUE(log_output.find("run_worker: true") != std::string::npos);
-  EXPECT_TRUE(log_output.find("driver_address: 10.0.0.1:9999") != std::string::npos);
-  EXPECT_TRUE(log_output.find("worker_address: 0.0.0.0:8888") != std::string::npos);
+  EXPECT_TRUE(log_output.find("run_driver: true") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("run_worker: true") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("driver_address: 10.0.0.1:9999") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("worker_address: 0.0.0.0:8888") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
   EXPECT_TRUE(log_output.find("worker_targets: fragment1, fragment2, fragment3") !=
-              std::string::npos);
-  EXPECT_TRUE(log_output.find("config_path: app_config.yaml") != std::string::npos);
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("config_path: app_config.yaml") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(Application, TestAppHelpOption) {
@@ -128,7 +140,8 @@ TEST(Application, TestAppHelpOption) {
   EXPECT_EXIT(make_application<Application>(args), ::testing::ExitedWithCode(0), ".*");
 
   std::string log_output = testing::internal::GetCapturedStdout();
-  EXPECT_TRUE(log_output.find("Usage: ") != std::string::npos);
+  EXPECT_TRUE(log_output.find("Usage: ") != std::string::npos) << "=== LOG ===\n"
+                                                               << log_output << "\n===========\n";
 }
 
 TEST(Application, TestAppVersionOption) {
@@ -139,7 +152,8 @@ TEST(Application, TestAppVersionOption) {
   EXPECT_EXIT(make_application<Application>(args), ::testing::ExitedWithCode(0), ".*");
 
   std::string log_output = testing::internal::GetCapturedStdout();
-  EXPECT_TRUE(log_output.find("0.0.0") != std::string::npos) << log_output;
+  EXPECT_TRUE(log_output.find("0.0.0") != std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 TEST(Application, TestAddFragment) {
@@ -210,7 +224,9 @@ TEST(Application, TestReservedFragmentName) {
   auto fragment = app->make_fragment<Fragment>("all");
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("Fragment name 'all' is reserved") != std::string::npos);
+  EXPECT_TRUE(log_output.find("Fragment name 'all' is reserved") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 }  // namespace holoscan
diff --git a/tests/core/arg.cpp b/tests/core/arg.cpp
index 1395bc9..01f4d98 100644
--- a/tests/core/arg.cpp
+++ b/tests/core/arg.cpp
@@ -374,8 +374,11 @@ TEST(Arg, TestArgListAsError) {
 
   // an error will have been logged about the failed parsing
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") != std::string::npos);
-  EXPECT_TRUE(log_output.find("Unable to parse YAML node") != std::string::npos);
+  EXPECT_TRUE(log_output.find("error") != std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Unable to parse YAML node") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(Arg, TestArgListDescription) {
diff --git a/tests/core/condition_classes.cpp b/tests/core/condition_classes.cpp
index 95228b7..7f4aa99 100644
--- a/tests/core/condition_classes.cpp
+++ b/tests/core/condition_classes.cpp
@@ -150,8 +150,11 @@ TEST_F(ConditionClassesWithGXFContext, TestCountConditionInitializeWithoutSpec)
   count.initialize();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") != std::string::npos);
-  EXPECT_TRUE(log_output.find("No component spec") != std::string::npos);
+  EXPECT_TRUE(log_output.find("error") != std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("No component spec") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST_F(ConditionClassesWithGXFContext, TestCountConditionInitializeWithUnrecognizedArg) {
@@ -162,8 +165,11 @@ TEST_F(ConditionClassesWithGXFContext, TestCountConditionInitializeWithUnrecogni
   condition->initialize();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("warning") != std::string::npos);
-  EXPECT_TRUE(log_output.find("'undefined_arg' not found in spec_.params") != std::string::npos);
+  EXPECT_TRUE(log_output.find("warning") != std::string::npos) << "=== LOG ===\n"
+                                                               << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("'undefined_arg' not found in spec_.params") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(ConditionClasses, TestDownstreamMessageAffordableCondition) {
@@ -350,8 +356,11 @@ TEST_F(ConditionClassesWithGXFContext, TestPeriodicConditionInitializeWithoutSpe
   periodic.initialize();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") != std::string::npos);
-  EXPECT_TRUE(log_output.find("No component spec") != std::string::npos);
+  EXPECT_TRUE(log_output.find("error") != std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("No component spec") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST_F(ConditionClassesWithGXFContext, TestPeriodicConditionInitializeWithArg) {
@@ -405,8 +414,11 @@ TEST_F(ConditionClassesWithGXFContext, TestPeriodicConditionInitializeWithUnreco
   condition->initialize();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("warning") != std::string::npos);
-  EXPECT_TRUE(log_output.find("'undefined_arg' not found in spec_.params") != std::string::npos);
+  EXPECT_TRUE(log_output.find("warning") != std::string::npos) << "=== LOG ===\n"
+                                                               << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("'undefined_arg' not found in spec_.params") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 }  // namespace holoscan
diff --git a/tests/core/config.cpp b/tests/core/config.cpp
index 3f14bbf..f25540d 100644
--- a/tests/core/config.cpp
+++ b/tests/core/config.cpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -45,8 +45,11 @@ TEST(Config, TestNonexistentFile) {
 
   // verify expected warning was logged to stderr
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("warning") != std::string::npos);
-  EXPECT_TRUE(log_output.find("Config file 'nonexistent.yaml' doesn't exist") != std::string::npos);
+  EXPECT_TRUE(log_output.find("warning") != std::string::npos) << "=== LOG ===\n"
+                                                               << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Config file 'nonexistent.yaml' doesn't exist") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 }  // namespace holoscan
diff --git a/tests/core/fragment.cpp b/tests/core/fragment.cpp
index 4489a7e..4e8f310 100644
--- a/tests/core/fragment.cpp
+++ b/tests/core/fragment.cpp
@@ -101,9 +101,12 @@ TEST(Fragment, TestFragmentConfig) {
   testing::internal::CaptureStderr();
   F.config(config_file);
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("warning") != std::string::npos);
+  EXPECT_TRUE(log_output.find("warning") != std::string::npos) << "=== LOG ===\n"
+                                                               << log_output << "\n===========\n";
   EXPECT_TRUE(log_output.find("Config object was already created. Overwriting...") !=
-              std::string::npos);
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(Fragment, TestFragmentConfigNestedArgs) {
@@ -145,9 +148,12 @@ TEST(Fragment, TestFragmentFromConfigNonexistentKey) {
 
   // verify that an error is logged when the key is not in the YAML file
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") != std::string::npos);
+  EXPECT_TRUE(log_output.find("error") != std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
   EXPECT_TRUE(log_output.find("Unable to find the parameter item/map with key 'non-existent'") !=
-              std::string::npos);
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(Fragment, TestFragmentConfigNonexistentFile) {
@@ -161,8 +167,11 @@ TEST(Fragment, TestFragmentConfigNonexistentFile) {
 
   // verify that an error is logged when the YAML file doesn't exist
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("warning") != std::string::npos);
-  EXPECT_TRUE(log_output.find("Config file 'nonexistent.yaml' doesn't exist") != std::string::npos);
+  EXPECT_TRUE(log_output.find("warning") != std::string::npos) << "=== LOG ===\n"
+                                                               << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Config file 'nonexistent.yaml' doesn't exist") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(Fragment, TestFragmentGraph) {
diff --git a/tests/core/logger.cpp b/tests/core/logger.cpp
index 79122b5..dbe82ea 100644
--- a/tests/core/logger.cpp
+++ b/tests/core/logger.cpp
@@ -50,8 +50,11 @@ TEST(Logger, TestLoggingPattern) {
 
   // test that the specified pattern includes the thread, but omits the message
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("[thread") != std::string::npos);
-  EXPECT_TRUE(log_output.find("my_message") == std::string::npos);
+  EXPECT_TRUE(log_output.find("[thread") != std::string::npos) << "=== LOG ===\n"
+                                                               << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("my_message") == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 
   // restore the original log level
   set_log_level(orig_level);
@@ -287,30 +290,49 @@ TEST_P(LevelParameterizedTestFixture, TestLoggingMacros) {
 
   switch (desired_level) {
     case LogLevel::TRACE:
-      EXPECT_TRUE(log_output.find("trace") != std::string::npos);
+      EXPECT_TRUE(log_output.find("trace") != std::string::npos) << "=== LOG ===\n"
+                                                                 << log_output << "\n===========\n";
       break;
     case LogLevel::DEBUG:
-      EXPECT_TRUE(log_output.find("debug") != std::string::npos);
-      EXPECT_TRUE(log_output.find("unlogged") == std::string::npos);
+      EXPECT_TRUE(log_output.find("debug") != std::string::npos) << "=== LOG ===\n"
+                                                                 << log_output << "\n===========\n";
+      EXPECT_TRUE(log_output.find("unlogged") == std::string::npos)
+          << "=== LOG ===\n"
+          << log_output << "\n===========\n";
       break;
     case LogLevel::INFO:
-      EXPECT_TRUE(log_output.find("info") != std::string::npos);
-      EXPECT_TRUE(log_output.find("unlogged") == std::string::npos);
+      EXPECT_TRUE(log_output.find("info") != std::string::npos) << "=== LOG ===\n"
+                                                                << log_output << "\n===========\n";
+      EXPECT_TRUE(log_output.find("unlogged") == std::string::npos)
+          << "=== LOG ===\n"
+          << log_output << "\n===========\n";
       break;
     case LogLevel::WARN:
-      EXPECT_TRUE(log_output.find("warn") != std::string::npos);
-      EXPECT_TRUE(log_output.find("unlogged") == std::string::npos);
+      EXPECT_TRUE(log_output.find("warn") != std::string::npos) << "=== LOG ===\n"
+                                                                << log_output << "\n===========\n";
+      EXPECT_TRUE(log_output.find("unlogged") == std::string::npos)
+          << "=== LOG ===\n"
+          << log_output << "\n===========\n";
       break;
     case LogLevel::ERROR:
-      EXPECT_TRUE(log_output.find("error") != std::string::npos);
-      EXPECT_TRUE(log_output.find("unlogged") == std::string::npos);
+      EXPECT_TRUE(log_output.find("error") != std::string::npos) << "=== LOG ===\n"
+                                                                 << log_output << "\n===========\n";
+      EXPECT_TRUE(log_output.find("unlogged") == std::string::npos)
+          << "=== LOG ===\n"
+          << log_output << "\n===========\n";
       break;
     case LogLevel::CRITICAL:
-      EXPECT_TRUE(log_output.find("critical") != std::string::npos);
-      EXPECT_TRUE(log_output.find("unlogged") == std::string::npos);
+      EXPECT_TRUE(log_output.find("critical") != std::string::npos)
+          << "=== LOG ===\n"
+          << log_output << "\n===========\n";
+      EXPECT_TRUE(log_output.find("unlogged") == std::string::npos)
+          << "=== LOG ===\n"
+          << log_output << "\n===========\n";
       break;
     case LogLevel::OFF:
-      EXPECT_TRUE(log_output.find("unlogged") == std::string::npos);
+      EXPECT_TRUE(log_output.find("unlogged") == std::string::npos)
+          << "=== LOG ===\n"
+          << log_output << "\n===========\n";
       break;
   }
 
diff --git a/tests/core/metadata.cpp b/tests/core/metadata.cpp
index 4ccfeea..4d89157 100644
--- a/tests/core/metadata.cpp
+++ b/tests/core/metadata.cpp
@@ -21,6 +21,7 @@
 #include <string>
 #include <vector>
 
+#include "holoscan/core/arg.hpp"
 #include "holoscan/core/metadata.hpp"
 
 #include "dummy_classes.hpp"
@@ -72,6 +73,29 @@ TEST(MetadataObject, TestSharedPtrAny) {
   EXPECT_EQ(*val, *val2);
 }
 
+TEST(MetadataObject, TestArgConversion) {
+  std::vector<float> vec{1.0, 2.0, 4.0};
+  MetadataObject obj{vec};
+  EXPECT_EQ(typeid(vec), obj.value().type());
+
+  // can assign MetadataObject.value() to Arg
+  Arg a{"v"};
+  // need any_cast here if we want ArgType enums to be set correctly
+  a = std::any_cast<std::vector<float>>(obj.value());
+  auto arg_type = a.arg_type();
+  EXPECT_EQ(arg_type.element_type(), ArgElementType::kFloat32);
+  EXPECT_EQ(arg_type.container_type(), ArgContainerType::kVector);
+
+  // // can pass Arg.value() to MetadataObject.set_value() without std::any_cast
+  MetadataObject obj2;
+  obj2.set_value(a.value());
+  EXPECT_EQ(typeid(vec), obj2.value().type());
+  auto vec2 = std::any_cast<std::vector<float>>(obj2.value());
+  EXPECT_EQ(vec[0], vec2[0]);
+  EXPECT_EQ(vec[1], vec2[1]);
+  EXPECT_EQ(vec[2], vec2[2]);
+}
+
 TEST(MetadataDictionary, TestConstructor) {
   MetadataDictionary d{};
 
@@ -163,13 +187,10 @@ TEST(MetadataDictionary, TestMetadataPolicy) {
   MetadataDictionary d{};
 
   d.set("patient name", "John Doe"s);
-  EXPECT_EQ(d.policy(), MetadataPolicy::kUpdate);
-  EXPECT_EQ(d.get<std::string>("patient name"), "John Doe"s);
+  EXPECT_EQ(d.policy(), MetadataPolicy::kRaise);
 
   // raise if new value is provided for existing key
-  d.policy(MetadataPolicy::kRaise);
   EXPECT_THROW(d.set("patient name", "Mr. Smith"s), std::runtime_error);
-  EXPECT_EQ(d.policy(), MetadataPolicy::kRaise);
 
   // reject new value
   d.policy(MetadataPolicy::kReject);
diff --git a/tests/core/native_operator.cpp b/tests/core/native_operator.cpp
index dbcf7aa..a92847d 100644
--- a/tests/core/native_operator.cpp
+++ b/tests/core/native_operator.cpp
@@ -101,8 +101,11 @@ TEST(NativeOperatorApp, TestNativeOperatorApp) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("value1: 1") != std::string::npos);
-  EXPECT_TRUE(log_output.find("value2: 100") != std::string::npos);
+  EXPECT_TRUE(log_output.find("value1: 1") != std::string::npos) << "=== LOG ===\n"
+                                                                 << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("value2: 100") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 }  // namespace holoscan
diff --git a/tests/core/operator_spec.cpp b/tests/core/operator_spec.cpp
index 804abbc..b82dac6 100644
--- a/tests/core/operator_spec.cpp
+++ b/tests/core/operator_spec.cpp
@@ -55,7 +55,9 @@ TEST(OperatorSpec, TestOperatorSpecInput) {
   // duplicate name
   spec.input<gxf::Entity>("a");
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("already exists") != std::string::npos);
+  EXPECT_TRUE(log_output.find("already exists") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 struct OperatorSpecTestParam {
@@ -93,7 +95,9 @@ TEST_P(OperatorSpecTest, TestOperatorSpecInputSize) {
   // duplicate name
   spec.input<gxf::Entity>(param.name);
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("already exists") != std::string::npos);
+  EXPECT_TRUE(log_output.find("already exists") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(OperatorSpec, TestOperatorSpectOutput) {
@@ -111,7 +115,9 @@ TEST(OperatorSpec, TestOperatorSpectOutput) {
   // duplicate name
   spec.output<gxf::Entity>("a");
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("already exists") != std::string::npos);
+  EXPECT_TRUE(log_output.find("already exists") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(OperatorSpec, TestOperatorSpecParam) {
@@ -138,7 +144,9 @@ TEST(OperatorSpec, TestOperatorSpecParam) {
   // repeating a key will not add an additional parameter
   spec.param(p, "beta", "headline1", "description4");
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("already exists") != std::string::npos);
+  EXPECT_TRUE(log_output.find("already exists") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
   EXPECT_EQ(spec.params().size(), 1);
 }
 
@@ -245,7 +253,9 @@ TEST(OperatorSpec, TestOperatorSpecParamVector) {
   EXPECT_EQ(spec.params().size(), 1);
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("already exists") != std::string::npos);
+  EXPECT_TRUE(log_output.find("already exists") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
   EXPECT_EQ(spec.params().size(), 1);
 }
 
diff --git a/tests/core/parameter.cpp b/tests/core/parameter.cpp
index 917c940..9cba2fe 100644
--- a/tests/core/parameter.cpp
+++ b/tests/core/parameter.cpp
@@ -172,7 +172,8 @@ TEST(Parameter, TestMetaParameterFormatter) {
   testing::internal::CaptureStderr();
   HOLOSCAN_LOG_INFO("Formatted parameter value: {}", p);
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("5") != std::string::npos);
+  EXPECT_TRUE(log_output.find("5") != std::string::npos) << "=== LOG ===\n"
+                                                         << log_output << "\n===========\n";
 }
 
 // simple test with format option for formatter feature
@@ -184,7 +185,8 @@ TEST(Parameter, TestMetaParameterFormatterSyntax) {
   testing::internal::CaptureStderr();
   HOLOSCAN_LOG_INFO("Formatted parameter value: {:0.3f}", seconds);
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("1.320") != std::string::npos);
+  EXPECT_TRUE(log_output.find("1.320") != std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 }  // namespace holoscan
diff --git a/tests/core/system_resource_manager.cpp b/tests/core/system_resource_manager.cpp
index 99e7930..6427819 100644
--- a/tests/core/system_resource_manager.cpp
+++ b/tests/core/system_resource_manager.cpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -94,8 +94,8 @@ TEST(SystemResourceManager, TestReportCPUResourceInfo) {
   EXPECT_GE(cpuinfo.shared_memory_available, 0);
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("[error]") == std::string::npos) << "Log message:\n"
-                                                               << log_output << "\n";
+  EXPECT_TRUE(log_output.find("[error]") == std::string::npos) << "=== LOG ===\n"
+                                                               << log_output << "\n===========\n";
 }
 
 TEST(SystemResourceManager, TestReportGPUResourceInfo) {
@@ -158,8 +158,8 @@ TEST(SystemResourceManager, TestReportGPUResourceInfo) {
   }
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("[error]") == std::string::npos) << "Log message:\n"
-                                                               << log_output << "\n";
+  EXPECT_TRUE(log_output.find("[error]") == std::string::npos) << "=== LOG ===\n"
+                                                               << log_output << "\n===========\n";
 }
 
 TEST(SystemResourceManager, TestGetCPUInfo) {
@@ -209,8 +209,8 @@ TEST(SystemResourceManager, TestGetCPUInfo) {
   HOLOSCAN_LOG_INFO("CPU memory usage: {}", cpu_info.memory_usage);
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("[error]") == std::string::npos) << "Log message:\n"
-                                                               << log_output << "\n";
+  EXPECT_TRUE(log_output.find("[error]") == std::string::npos) << "=== LOG ===\n"
+                                                               << log_output << "\n===========\n";
 }
 
 TEST(SystemResourceManager, TestGetGPUInfo) {
@@ -298,8 +298,8 @@ TEST(SystemResourceManager, TestGetGPUInfo) {
   }
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("[error]") == std::string::npos) << "Log message:\n"
-                                                               << log_output << "\n";
+  EXPECT_TRUE(log_output.find("[error]") == std::string::npos) << "=== LOG ===\n"
+                                                               << log_output << "\n===========\n";
 }
 
 }  // namespace holoscan
diff --git a/tests/data/app_config.yaml b/tests/data/app_config.yaml
index 95ac7f7..b65fc7c 100644
--- a/tests/data/app_config.yaml
+++ b/tests/data/app_config.yaml
@@ -191,6 +191,7 @@ v4l2_video_capture:
   width: 320
   height: 240
   pixel_format: "auto"
+  pass_through: false
   device: "/dev/video0"
   exposure_time: 500
   gain: 100
diff --git a/tests/data/validation_frames/aja_capture/0001.png b/tests/data/validation_frames/aja_capture/0001.png
new file mode 100644
index 0000000..00b60d9
Binary files /dev/null and b/tests/data/validation_frames/aja_capture/0001.png differ
diff --git a/tests/data/validation_frames/aja_capture/0002.png b/tests/data/validation_frames/aja_capture/0002.png
new file mode 100644
index 0000000..00b60d9
Binary files /dev/null and b/tests/data/validation_frames/aja_capture/0002.png differ
diff --git a/tests/data/validation_frames/aja_capture/0003.png b/tests/data/validation_frames/aja_capture/0003.png
new file mode 100644
index 0000000..00b60d9
Binary files /dev/null and b/tests/data/validation_frames/aja_capture/0003.png differ
diff --git a/tests/data/validation_frames/aja_capture/0004.png b/tests/data/validation_frames/aja_capture/0004.png
new file mode 100644
index 0000000..00b60d9
Binary files /dev/null and b/tests/data/validation_frames/aja_capture/0004.png differ
diff --git a/tests/data/validation_frames/aja_capture/0005.png b/tests/data/validation_frames/aja_capture/0005.png
new file mode 100644
index 0000000..00b60d9
Binary files /dev/null and b/tests/data/validation_frames/aja_capture/0005.png differ
diff --git a/tests/data/validation_frames/aja_capture/0006.png b/tests/data/validation_frames/aja_capture/0006.png
new file mode 100644
index 0000000..00b60d9
Binary files /dev/null and b/tests/data/validation_frames/aja_capture/0006.png differ
diff --git a/tests/data/validation_frames/aja_capture/0007.png b/tests/data/validation_frames/aja_capture/0007.png
new file mode 100644
index 0000000..00b60d9
Binary files /dev/null and b/tests/data/validation_frames/aja_capture/0007.png differ
diff --git a/tests/data/validation_frames/aja_capture/0008.png b/tests/data/validation_frames/aja_capture/0008.png
new file mode 100644
index 0000000..00b60d9
Binary files /dev/null and b/tests/data/validation_frames/aja_capture/0008.png differ
diff --git a/tests/data/validation_frames/aja_capture/0009.png b/tests/data/validation_frames/aja_capture/0009.png
new file mode 100644
index 0000000..00b60d9
Binary files /dev/null and b/tests/data/validation_frames/aja_capture/0009.png differ
diff --git a/tests/data/validation_frames/aja_capture/0010.png b/tests/data/validation_frames/aja_capture/0010.png
new file mode 100644
index 0000000..00b60d9
Binary files /dev/null and b/tests/data/validation_frames/aja_capture/0010.png differ
diff --git a/tests/data/validation_frames/aja_capture/cpp_aja_capture.patch b/tests/data/validation_frames/aja_capture/cpp_aja_capture.patch
new file mode 100644
index 0000000..4c4daa4
--- /dev/null
+++ b/tests/data/validation_frames/aja_capture/cpp_aja_capture.patch
@@ -0,0 +1,25 @@
+--- a/examples/aja_capture/cpp/aja_capture.cpp
++++ b/examples/aja_capture/cpp/aja_capture2.cpp
+@@ -19,6 +19,12 @@
+ #include <holoscan/operators/aja_source/aja_source.hpp>
+ #include <holoscan/operators/holoviz/holoviz.hpp>
+ 
++#ifdef RECORD_OUTPUT
++#include <holoscan/operators/format_converter/format_converter.hpp>
++#include <holoscan/operators/video_stream_recorder/video_stream_recorder.hpp>
++#include <recorder.hpp>
++#endif
++
+ class App : public holoscan::Application {
+  public:
+   void compose() override {
+@@ -29,6 +35,9 @@ class App : public holoscan::Application {
+ 
+     // Flow definition
+     add_flow(source, visualizer, {{"video_buffer_output", "receivers"}});
++
++    // Recorder to validate the video output
++    RECORDER(visualizer);
+   }
+ };
+ 
diff --git a/tests/data/validation_frames/aja_capture/python_aja_capture.patch b/tests/data/validation_frames/aja_capture/python_aja_capture.patch
new file mode 100644
index 0000000..020c719
--- /dev/null
+++ b/tests/data/validation_frames/aja_capture/python_aja_capture.patch
@@ -0,0 +1,37 @@
+--- a/../examples/aja_capture/python/aja_capture.py
++++ b/../examples/aja_capture/python/aja_capture2.py
+@@ -19,8 +19,8 @@ import os
+ 
+ from holoscan.conditions import CountCondition
+ from holoscan.core import Application
+-from holoscan.operators import AJASourceOp, HolovizOp
+-
++from holoscan.operators import AJASourceOp, HolovizOp, VideoStreamRecorderOp, FormatConverterOp
++from holoscan.resources import UnboundedAllocator
+ 
+ class AJACaptureApp(Application):
+     """
+@@ -54,6 +54,23 @@ class AJACaptureApp(Application):
+         )
+ 
+         self.add_flow(source, visualizer, {("video_buffer_output", "receivers")})
++        recorder_format_converter = FormatConverterOp(
++            self, 
++            name="recorder_format_converter", 
++            in_dtype="rgba8888",
++            out_dtype="rgb888",
++            pool=UnboundedAllocator(self, name="pool")
++        )
++        recorder = VideoStreamRecorderOp(
++            self, 
++            name="recorder",
++            **self.kwargs("recorder")
++        )
++
++        visualizer.add_arg(allocator=UnboundedAllocator(self, name="allocator"))
++
++        self.add_flow(visualizer, recorder_format_converter, {("render_buffer_output", "source_video")})
++        self.add_flow(recorder_format_converter, recorder)
+ 
+ 
+ def main(config_file):
diff --git a/tests/data/video_replayer_apps.yaml b/tests/data/video_replayer_apps.yaml
new file mode 100644
index 0000000..e9cd98a
--- /dev/null
+++ b/tests/data/video_replayer_apps.yaml
@@ -0,0 +1,33 @@
+%YAML 1.2
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+replayer:
+  directory: "../data/racerx"
+  basename: "racerx"
+  frame_rate: 0   # as specified in timestamps
+  repeat: false   # default: false
+  realtime: true  # default: true
+  count: 10       # default: 0 (no frame count restriction)
+
+holoviz:
+  width: 960
+  height: 540
+  headless: true
+  tensors:
+    - name: ""
+      type: color
+      opacity: 1.0
+      priority: 0
diff --git a/tests/flow_tracking/entity_passthrough.cpp b/tests/flow_tracking/entity_passthrough.cpp
index f2762dc..7a7cd2e 100644
--- a/tests/flow_tracking/entity_passthrough.cpp
+++ b/tests/flow_tracking/entity_passthrough.cpp
@@ -130,7 +130,9 @@ TEST(Graphs, TestFlowTrackingWithEntityPassthrough) {
   tracker.print();
 
   std::string log_output = testing::internal::GetCapturedStdout();
-  EXPECT_TRUE(log_output.find("OneOut,OneInOneOut,OneIn") != std::string::npos);
+  EXPECT_TRUE(log_output.find("OneOut,OneInOneOut,OneIn") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 }  // namespace holoscan
diff --git a/tests/flow_tracking/flow_tracking_cycle.cpp b/tests/flow_tracking/flow_tracking_cycle.cpp
index b41bcb0..0452361 100644
--- a/tests/flow_tracking/flow_tracking_cycle.cpp
+++ b/tests/flow_tracking/flow_tracking_cycle.cpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -408,8 +408,12 @@ TEST(Graphs, TestFlowTrackingForCycleWithSource) {
   tracker.print();
 
   std::string log_output = testing::internal::GetCapturedStdout();
-  EXPECT_TRUE(log_output.find("OneOut,TwoInOneOut,OneInOneOut,TwoInOneOut") != std::string::npos);
-  EXPECT_TRUE(log_output.find("TwoInOneOut,OneInOneOut,TwoInOneOut") != std::string::npos);
+  EXPECT_TRUE(log_output.find("OneOut,TwoInOneOut,OneInOneOut,TwoInOneOut") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("TwoInOneOut,OneInOneOut,TwoInOneOut") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(Graphs, TestFlowTrackingForMiddleCycle) {
@@ -426,10 +430,18 @@ TEST(Graphs, TestFlowTrackingForMiddleCycle) {
   tracker.print();
 
   std::string log_output = testing::internal::GetCapturedStdout();
-  EXPECT_TRUE(log_output.find("OneOut,TwoInOneOut,OneInOneOut,PingRx") != std::string::npos);
-  EXPECT_TRUE(log_output.find("OneOut,TwoInOneOut,OneInOneOut,TwoInOneOut") != std::string::npos);
-  EXPECT_TRUE(log_output.find("TwoInOneOut,OneInOneOut,PingRx") != std::string::npos);
-  EXPECT_TRUE(log_output.find("TwoInOneOut,OneInOneOut,TwoInOneOut") != std::string::npos);
+  EXPECT_TRUE(log_output.find("OneOut,TwoInOneOut,OneInOneOut,PingRx") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("OneOut,TwoInOneOut,OneInOneOut,TwoInOneOut") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("TwoInOneOut,OneInOneOut,PingRx") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("TwoInOneOut,OneInOneOut,TwoInOneOut") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(Graphs, TestFlowTrackingForCycleWithLeaf) {
@@ -446,8 +458,12 @@ TEST(Graphs, TestFlowTrackingForCycleWithLeaf) {
   tracker.print();
 
   std::string log_output = testing::internal::GetCapturedStdout();
-  EXPECT_TRUE(log_output.find("root,middle,leaf") != std::string::npos);
-  EXPECT_TRUE(log_output.find("root,middle,root") != std::string::npos);
+  EXPECT_TRUE(log_output.find("root,middle,leaf") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("root,middle,root") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(Graphs, TestFlowTrackingForTwoRootsOneCycle) {
@@ -465,8 +481,12 @@ TEST(Graphs, TestFlowTrackingForTwoRootsOneCycle) {
 
   std::string log_output = testing::internal::GetCapturedStdout();
   EXPECT_TRUE(log_output.find("middle2,last,middle2") != std::string::npos);
-  EXPECT_TRUE(log_output.find("root1,middle1,middle2,last,middle2") != std::string::npos);
-  EXPECT_TRUE(log_output.find("root2,middle2,last,middle2") != std::string::npos);
+  EXPECT_TRUE(log_output.find("root1,middle1,middle2,last,middle2") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("root2,middle2,last,middle2") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(Graphs, TestFlowTrackingForTwoCyclesVariant1) {
@@ -483,8 +503,12 @@ TEST(Graphs, TestFlowTrackingForTwoCyclesVariant1) {
   tracker.print();
 
   std::string log_output = testing::internal::GetCapturedStdout();
-  EXPECT_TRUE(log_output.find("middle,end,middle") != std::string::npos);
-  EXPECT_TRUE(log_output.find("middle,start,middle") != std::string::npos);
+  EXPECT_TRUE(log_output.find("middle,end,middle") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("middle,start,middle") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(Graphs, TestFlowTrackingForTwoCyclesVariant2) {
@@ -501,15 +525,23 @@ TEST(Graphs, TestFlowTrackingForTwoCyclesVariant2) {
   tracker.print();
 
   std::string log_output = testing::internal::GetCapturedStdout();
-  EXPECT_TRUE(log_output.find("middle,end,middle") != std::string::npos);
-  EXPECT_TRUE(log_output.find("middle,start,middle") != std::string::npos);
+  EXPECT_TRUE(log_output.find("middle,end,middle") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("middle,start,middle") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 
   // The following two paths have only two messages even though 5 messages are sent from the start
   // This is because no more than 2 messages could travel the following two loops.
   // The origin of the rest of the messages become middle node and they travel in the above two
   // loops.
-  EXPECT_TRUE(log_output.find("start,middle,end,middle") != std::string::npos);
-  EXPECT_TRUE(log_output.find("start,middle,start") != std::string::npos);
+  EXPECT_TRUE(log_output.find("start,middle,end,middle") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("start,middle,start") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 }  // namespace holoscan
diff --git a/tests/operators/operator_classes.cpp b/tests/operators/operator_classes.cpp
index d94966b..e604fc0 100644
--- a/tests/operators/operator_classes.cpp
+++ b/tests/operators/operator_classes.cpp
@@ -84,7 +84,8 @@ TEST_F(OperatorClassesWithGXFContext, TestAJASourceOpChannelFromYAML) {
   EXPECT_TRUE(op->description().find("name: " + name) != std::string::npos);
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") == std::string::npos);
+  EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 TEST_F(TestWithGXFContext, TestAJASourceOpChannelFromEnum) {
@@ -107,7 +108,8 @@ TEST_F(TestWithGXFContext, TestAJASourceOpChannelFromEnum) {
 
   std::string log_output = testing::internal::GetCapturedStderr();
 
-  EXPECT_TRUE(log_output.find("error") == std::string::npos);
+  EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 #endif
 
@@ -144,7 +146,8 @@ TEST_F(OperatorClassesWithGXFContext, TestFormatConverterOp) {
   EXPECT_TRUE(op->description().find("name: " + name) != std::string::npos);
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") == std::string::npos);
+  EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 TEST_F(OperatorClassesWithGXFContext, TestVideoStreamRecorderOp) {
@@ -161,7 +164,8 @@ TEST_F(OperatorClassesWithGXFContext, TestVideoStreamRecorderOp) {
   EXPECT_TRUE(op->description().find("name: " + name) != std::string::npos);
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") == std::string::npos);
+  EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 TEST_F(OperatorClassesWithGXFContext, TestVideoStreamReplayerOp) {
@@ -186,7 +190,8 @@ TEST_F(OperatorClassesWithGXFContext, TestVideoStreamReplayerOp) {
   EXPECT_TRUE(op->description().find("name: " + name) != std::string::npos);
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") == std::string::npos);
+  EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 TEST_F(OperatorClassesWithGXFContext, TestSegmentationPostprocessorOp) {
@@ -206,7 +211,8 @@ TEST_F(OperatorClassesWithGXFContext, TestSegmentationPostprocessorOp) {
   EXPECT_TRUE(op->description().find("name: " + name) != std::string::npos);
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") == std::string::npos);
+  EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 TEST_F(OperatorClassesWithGXFContext, TestHolovizOp) {
@@ -233,7 +239,8 @@ TEST_F(OperatorClassesWithGXFContext, TestHolovizOp) {
   EXPECT_TRUE(op->description().find("name: " + name) != std::string::npos);
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") == std::string::npos);
+  EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 TEST_F(OperatorClassesWithGXFContext, TestHolovizOpInputSpec) {
@@ -264,7 +271,8 @@ TEST_F(OperatorClassesWithGXFContext, TestInferenceOp) {
   EXPECT_TRUE(op->description().find("name: " + name) != std::string::npos);
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") == std::string::npos);
+  EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 TEST_F(OperatorClassesWithGXFContext, TestInferenceProcessorOp) {
@@ -282,7 +290,8 @@ TEST_F(OperatorClassesWithGXFContext, TestInferenceProcessorOp) {
   EXPECT_TRUE(op->description().find("name: " + name) != std::string::npos);
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") == std::string::npos);
+  EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 TEST_F(OperatorClassesWithGXFContext, TestBayerDemosaicOp) {
@@ -302,7 +311,8 @@ TEST_F(OperatorClassesWithGXFContext, TestBayerDemosaicOp) {
   EXPECT_TRUE(op->description().find("name: " + name) != std::string::npos);
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") == std::string::npos);
+  EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 TEST_F(OperatorClassesWithGXFContext, TestBayerDemosaicOpDefaultConstructor) {
@@ -311,7 +321,8 @@ TEST_F(OperatorClassesWithGXFContext, TestBayerDemosaicOpDefaultConstructor) {
   auto op = F.make_operator<ops::BayerDemosaicOp>();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") == std::string::npos);
+  EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 TEST(Operator, TestNativeOperatorWithoutFragment) {
@@ -323,9 +334,13 @@ TEST(Operator, TestNativeOperatorWithoutFragment) {
 
   op.initialize();
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("warning") != std::string::npos);
-  EXPECT_TRUE(log_output.find("Fragment is not set") != std::string::npos);
-  EXPECT_TRUE(log_output.find("error") == std::string::npos);
+  EXPECT_TRUE(log_output.find("warning") != std::string::npos) << "=== LOG ===\n"
+                                                               << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Fragment is not set") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 TEST_F(OperatorClassesWithGXFContext, TestPingRxOp) {
@@ -337,7 +352,8 @@ TEST_F(OperatorClassesWithGXFContext, TestPingRxOp) {
   EXPECT_EQ(op->name(), name);
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") == std::string::npos);
+  EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 TEST_F(OperatorClassesWithGXFContext, TestOperatorMetadataAttributes) {
@@ -356,6 +372,9 @@ TEST_F(OperatorClassesWithGXFContext, TestOperatorMetadataAttributes) {
   auto op = F.make_operator<ops::PingRxOp>(name);
   EXPECT_EQ(op->name(), name);
 
+  // default metadata policy is kRaise
+  EXPECT_EQ(op->metadata_policy(), MetadataPolicy::kRaise);
+
   // at construction metadata is disabled
   EXPECT_FALSE(op->is_metadata_enabled());
 
@@ -375,7 +394,8 @@ TEST_F(OperatorClassesWithGXFContext, TestPingTxOp) {
   EXPECT_EQ(op->name(), name);
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") == std::string::npos);
+  EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 TEST_F(OperatorClassesWithGXFContext, TestPingTxWithStringName) {
@@ -387,7 +407,8 @@ TEST_F(OperatorClassesWithGXFContext, TestPingTxWithStringName) {
   EXPECT_EQ(op->name(), name);
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") == std::string::npos);
+  EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 TEST_F(OperatorClassesWithGXFContext, TestAsyncPingRxOp) {
@@ -399,7 +420,8 @@ TEST_F(OperatorClassesWithGXFContext, TestAsyncPingRxOp) {
   EXPECT_EQ(op->name(), name);
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") == std::string::npos);
+  EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 TEST_F(OperatorClassesWithGXFContext, TestAsyncPingTxOp) {
@@ -411,7 +433,8 @@ TEST_F(OperatorClassesWithGXFContext, TestAsyncPingTxOp) {
   EXPECT_EQ(op->name(), name);
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") == std::string::npos);
+  EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 TEST_F(OperatorClassesWithGXFContext, TestV4L2VideoCaptureOp) {
@@ -423,6 +446,7 @@ TEST_F(OperatorClassesWithGXFContext, TestV4L2VideoCaptureOp) {
 
   ArgList kwargs{Arg{"device", std::string("/dev/video0")},
                  Arg{"pixel_format", std::string("auto")},
+                 Arg{"pass_through", false},
                  Arg{"width", width},
                  Arg{"height", height},
                  Arg{"allocator", F.make_resource<UnboundedAllocator>("pool")},
@@ -437,7 +461,8 @@ TEST_F(OperatorClassesWithGXFContext, TestV4L2VideoCaptureOp) {
   EXPECT_TRUE(op->description().find("name: " + name) != std::string::npos);
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") == std::string::npos);
+  EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 TEST_F(OperatorClassesWithGXFContext, TestV4L2VideoCaptureOpYAMLConfig) {
@@ -453,7 +478,8 @@ TEST_F(OperatorClassesWithGXFContext, TestV4L2VideoCaptureOpYAMLConfig) {
   EXPECT_TRUE(op->description().find("name: " + name) != std::string::npos);
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") == std::string::npos);
+  EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 TEST_F(OperatorClassesWithGXFContext, TestV4L2VideoCaptureOpDefaults) {
@@ -471,7 +497,8 @@ TEST_F(OperatorClassesWithGXFContext, TestV4L2VideoCaptureOpDefaults) {
   EXPECT_TRUE(op->description().find("name: " + name) != std::string::npos);
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") == std::string::npos);
+  EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 TEST_F(OperatorClassesWithGXFContext, TestInvalidOperatorName) {
diff --git a/tests/system/cycle.cpp b/tests/system/cycle.cpp
index a57a05d..37fd793 100644
--- a/tests/system/cycle.cpp
+++ b/tests/system/cycle.cpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -267,7 +267,9 @@ TEST(Graphs, CycleWithSource) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("OneInOneOut count 10") != std::string::npos);
+  EXPECT_TRUE(log_output.find("OneInOneOut count 10") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(Graphs, BroadcastInCycle) {
@@ -279,7 +281,9 @@ TEST(Graphs, BroadcastInCycle) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("OneInOneOut count 10") != std::string::npos);
+  EXPECT_TRUE(log_output.find("OneInOneOut count 10") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(Graphs, BroadcastAndRxInCycle) {
@@ -291,8 +295,12 @@ TEST(Graphs, BroadcastAndRxInCycle) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("OneInOneOut count 10") != std::string::npos);
-  EXPECT_TRUE(log_output.find("OneInOp count 10") != std::string::npos);
+  EXPECT_TRUE(log_output.find("OneInOneOut count 10") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("OneInOp count 10") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(Graphs, TestHasCycleForCycleWithSource) {
diff --git a/tests/system/demosaic_op_app.cpp b/tests/system/demosaic_op_app.cpp
index 48611a8..a8c005f 100644
--- a/tests/system/demosaic_op_app.cpp
+++ b/tests/system/demosaic_op_app.cpp
@@ -24,9 +24,8 @@
 #include "../config.hpp"
 #include "holoscan/holoscan.hpp"
 #include "holoscan/operators/bayer_demosaic/bayer_demosaic.hpp"
-
-#include "ping_tensor_rx_op.hpp"
-#include "ping_tensor_tx_op.hpp"
+#include "holoscan/operators/ping_tensor_rx/ping_tensor_rx.hpp"
+#include "holoscan/operators/ping_tensor_tx/ping_tensor_tx.hpp"
 
 static HoloscanTestConfig test_config;
 
@@ -35,32 +34,31 @@ class DummyDemosaicApp : public holoscan::Application {
   void compose() override {
     using namespace holoscan;
 
-    int32_t rows = 3840;
-    int32_t columns = 2160;
-    int32_t channels = 3;
     std::string tensor_name{"signal"};
     auto tx = make_operator<ops::PingTensorTxOp>("tx",
-                                                 Arg("rows", rows),
-                                                 Arg("columns", columns),
-                                                 Arg("channels", channels),
+                                                 Arg("rows", rows_),
+                                                 Arg("columns", columns_),
+                                                 Arg("channels", channels_),
                                                  Arg("tensor_name", tensor_name),
                                                  Arg("storage_type", std::string("device")),
                                                  make_condition<CountCondition>(3));
 
     auto cuda_stream_pool = make_resource<CudaStreamPool>("cuda_stream", 0, 0, 0, 1, 5);
     if (explicit_stream_pool_init_) { cuda_stream_pool->initialize(); }
+    bool generate_alpha = false;
+    int32_t out_channels = generate_alpha ? 4 : 3;
     ArgList demosaic_arglist = ArgList{
         Arg("in_tensor_name", tensor_name),
         Arg("out_tensor_name", tensor_name),
-        Arg("generate_alpha", false),
+        Arg("generate_alpha", generate_alpha),
         Arg("bayer_grid_pos", 2),
         Arg("interpolation_mode", 0),
-        Arg("pool", make_resource<BlockMemoryPool>("pool", 1, rows * columns * channels, 2)),
+        Arg("pool", make_resource<BlockMemoryPool>("pool", 1, rows_ * columns_ * out_channels, 2)),
         Arg("cuda_stream_pool", cuda_stream_pool)};
 
     auto bayer_demosaic = make_operator<ops::BayerDemosaicOp>("bayer_demosaic", demosaic_arglist);
 
-    auto rx = make_operator<ops::PingTensorRxOp>("rx", Arg("tensor_name", tensor_name));
+    auto rx = make_operator<ops::PingTensorRxOp>("rx");
     add_flow(tx, bayer_demosaic);
     add_flow(bayer_demosaic, rx);
   }
@@ -69,9 +67,16 @@ class DummyDemosaicApp : public holoscan::Application {
 
   void set_storage_type(const std::string& storage_type) { storage_type_ = storage_type; }
 
+  void set_rows(int32_t rows) { rows_ = rows; }
+  void set_columns(int32_t columns) { columns_ = columns; }
+  void set_channels(int32_t channels) { channels_ = channels; }
+
  private:
   bool explicit_stream_pool_init_ = false;
   std::string storage_type_ = std::string("device");
+  int32_t rows_ = 3840;
+  int32_t columns_ = 3840;
+  int32_t channels_ = 1;
 };
 
 class DemosaicStorageParameterizedTestFixture : public ::testing::TestWithParam<std::string> {};
@@ -94,16 +99,22 @@ TEST_P(DemosaicStorageParameterizedTestFixture, TestDummyDemosaicApp) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("Graph activation failed") == std::string::npos);
+  EXPECT_TRUE(log_output.find("Graph activation failed") == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 
   // verify that there are now no warnings about GPUDevice not being found
   std::string resource_warning = "cannot find Resource of type: nvidia::gxf::GPUDevice";
-  EXPECT_TRUE(log_output.find(resource_warning) == std::string::npos);
+  EXPECT_TRUE(log_output.find(resource_warning) == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 
   // Verify that BlockMemoryPool and CudaStreamPool did not get initialized on a separate entity
   // from DummyDemosaicApp. (check for absence of warning from GXFResource::initialize).
   std::string graph_entity_warning = "initialized independent of a parent entity";
-  EXPECT_TRUE(log_output.find(graph_entity_warning) == std::string::npos);
+  EXPECT_TRUE(log_output.find(graph_entity_warning) == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(DemosaicOpApp, TestDummyDemosaicAppWithExplicitInit) {
@@ -119,14 +130,94 @@ TEST(DemosaicOpApp, TestDummyDemosaicAppWithExplicitInit) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("Graph activation failed") == std::string::npos);
+  EXPECT_TRUE(log_output.find("Graph activation failed") == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 
   // verify that there are now no warnings about GPUDevice not being found
   std::string resource_warning = "cannot find Resource of type: nvidia::gxf::GPUDevice";
-  EXPECT_TRUE(log_output.find(resource_warning) == std::string::npos);
+  EXPECT_TRUE(log_output.find(resource_warning) == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 
   // Due to `set_explicit_stream_pool_init = true` we expect to see a warning from
   // GXFResource::initialize due to explicit initialization of a resource to its own entity.
   std::string graph_entity_warning = "initialized independent of a parent entity";
-  EXPECT_TRUE(log_output.find(graph_entity_warning) != std::string::npos);
+  EXPECT_TRUE(log_output.find(graph_entity_warning) != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST(DemosaicOpApp, TestDummyDemosaicAppWithOddRows) {
+  // Test fix for issue 4313690 (failure to initialize graph when using BayerDemosaicOp)
+  using namespace holoscan;
+
+  auto app = make_application<DummyDemosaicApp>();
+  app->set_rows(1919);
+
+  // capture output to check that the expected messages were logged
+  testing::internal::CaptureStderr();
+
+  EXPECT_THROW(app->run(), std::runtime_error);
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(log_output.find("Input tensor must have an even number of rows") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST(DemosaicOpApp, TestDummyDemosaicAppWithOddColumns) {
+  // Test fix for issue 4313690 (failure to initialize graph when using BayerDemosaicOp)
+  using namespace holoscan;
+
+  auto app = make_application<DummyDemosaicApp>();
+  app->set_columns(799);
+
+  // capture output to check that the expected messages were logged
+  testing::internal::CaptureStderr();
+
+  EXPECT_THROW(app->run(), std::runtime_error);
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(log_output.find("Input tensor must have an even number of columns") !=
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST(DemosaicOpApp, TestDummyDemosaicAppWithMultipleChannels) {
+  // Test fix for issue 4313690 (failure to initialize graph when using BayerDemosaicOp)
+  using namespace holoscan;
+
+  auto app = make_application<DummyDemosaicApp>();
+  app->set_channels(2);
+
+  // capture output to check that the expected messages were logged
+  testing::internal::CaptureStderr();
+
+  EXPECT_THROW(app->run(), std::runtime_error);
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(log_output.find("For 3D HWC input, the number of channels, C, must be 1.") !=
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST(DemosaicOpApp, TestDummyDemosaicAppWith2DInput) {
+  // Test fix for issue 4313690 (failure to initialize graph when using BayerDemosaicOp)
+  using namespace holoscan;
+
+  auto app = make_application<DummyDemosaicApp>();
+  app->set_channels(0);
+
+  // capture output to check that the expected messages were logged
+  testing::internal::CaptureStderr();
+
+  app->run();
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(log_output.find("Graph activation failed") == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
diff --git a/tests/system/distributed/distributed_app.cpp b/tests/system/distributed/distributed_app.cpp
index 207a007..4bdae26 100644
--- a/tests/system/distributed/distributed_app.cpp
+++ b/tests/system/distributed/distributed_app.cpp
@@ -41,8 +41,12 @@ TEST(DistributedApp, TestTwoParallelFragmentsApp) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("SingleOp fragment1.op: 0 - 10") != std::string::npos);
-  EXPECT_TRUE(log_output.find("SingleOp fragment2.op: 0 - 10") != std::string::npos);
+  EXPECT_TRUE(log_output.find("SingleOp fragment1.op: 0 - 10") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("SingleOp fragment2.op: 0 - 10") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(DistributedApp, TestTwoMultiInputsOutputsFragmentsApp) {
@@ -54,8 +58,9 @@ TEST(DistributedApp, TestTwoMultiInputsOutputsFragmentsApp) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("received count: 10") != std::string::npos) << "===LogMessage===\n"
-                                                                          << log_output;
+  EXPECT_TRUE(log_output.find("received count: 10") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(DistributedApp, TestTwoMultipleSingleOutputOperatorsApp) {
@@ -67,7 +72,9 @@ TEST(DistributedApp, TestTwoMultipleSingleOutputOperatorsApp) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("received count: 10") != std::string::npos);
+  EXPECT_TRUE(log_output.find("received count: 10") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(DistributedApp, TestTwoMultipleSingleOutputOperatorsBroadcastApp) {
@@ -79,7 +86,9 @@ TEST(DistributedApp, TestTwoMultipleSingleOutputOperatorsBroadcastApp) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("received count: 10") != std::string::npos);
+  EXPECT_TRUE(log_output.find("received count: 10") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(DistributedApp, TestOneTxBroadcastOneRxTwoInputs) {
@@ -91,7 +100,9 @@ TEST(DistributedApp, TestOneTxBroadcastOneRxTwoInputs) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("received count: 10") != std::string::npos);
+  EXPECT_TRUE(log_output.find("received count: 10") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(DistributedApp, TestTwoMultiInputsOutputsFragmentsApp2) {
@@ -103,7 +114,9 @@ TEST(DistributedApp, TestTwoMultiInputsOutputsFragmentsApp2) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("received count: 10") != std::string::npos);
+  EXPECT_TRUE(log_output.find("received count: 10") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(DistributedApp, TestUCXConnectionApp) {
@@ -115,7 +128,9 @@ TEST(DistributedApp, TestUCXConnectionApp) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("received count: 10") != std::string::npos);
+  EXPECT_TRUE(log_output.find("received count: 10") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(DistributedApp, TestUCXConnectionApp2) {
@@ -127,7 +142,9 @@ TEST(DistributedApp, TestUCXConnectionApp2) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("received count: 10") != std::string::npos);
+  EXPECT_TRUE(log_output.find("received count: 10") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(DistributedApp, TestUCXLinearPipelineApp) {
@@ -138,7 +155,9 @@ TEST(DistributedApp, TestUCXLinearPipelineApp) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("received count: 20") != std::string::npos);
+  EXPECT_TRUE(log_output.find("received count: 20") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(DistributedApp, TestUCXBroadcastApp) {
@@ -153,7 +172,9 @@ TEST(DistributedApp, TestUCXBroadcastApp) {
   EXPECT_TRUE(log_output.find("Rx fragment3.rx message received count: 10") != std::string::npos)
       << "=== LOG ===\n"
       << log_output << "\n===========\n";
-  EXPECT_TRUE(log_output.find("Rx fragment4.rx message received count: 10") != std::string::npos);
+  EXPECT_TRUE(log_output.find("Rx fragment4.rx message received count: 10") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(DistributedApp, TestUCXBroadCastMultiReceiverApp) {
@@ -166,8 +187,12 @@ TEST(DistributedApp, TestUCXBroadCastMultiReceiverApp) {
 
   std::string log_output = testing::internal::GetCapturedStderr();
   EXPECT_TRUE(log_output.find("RxParam fragment2.rx message received (count: 10, size: 2)") !=
-              std::string::npos);
-  EXPECT_TRUE(log_output.find("Rx fragment4.rx message received count: 10") != std::string::npos);
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Rx fragment4.rx message received count: 10") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(DistributedApp, TestDriverTerminationWithConnectionFailure) {
@@ -194,7 +219,9 @@ TEST(DistributedApp, TestDriverTerminationWithConnectionFailure) {
   // The driver should terminate after the connection failure (after 1 retry)
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("Failed to connect to driver") != std::string::npos);
+  EXPECT_TRUE(log_output.find("Failed to connect to driver") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 
   // restore the original environment variable
   if (env_orig) {
diff --git a/tests/system/distributed/distributed_demosaic_op_app.cpp b/tests/system/distributed/distributed_demosaic_op_app.cpp
index 1bea782..9abe46c 100644
--- a/tests/system/distributed/distributed_demosaic_op_app.cpp
+++ b/tests/system/distributed/distributed_demosaic_op_app.cpp
@@ -22,10 +22,10 @@
 #include <utility>
 
 #include "../../config.hpp"
-#include "../ping_tensor_rx_op.hpp"
-#include "../ping_tensor_tx_op.hpp"
 #include "holoscan/holoscan.hpp"
 #include "holoscan/operators/bayer_demosaic/bayer_demosaic.hpp"
+#include "holoscan/operators/ping_tensor_rx/ping_tensor_rx.hpp"
+#include "holoscan/operators/ping_tensor_tx/ping_tensor_tx.hpp"
 
 static HoloscanTestConfig test_config;
 
@@ -36,7 +36,7 @@ class GenerateAndDemosaicFragment : public holoscan::Fragment {
 
     int32_t rows = 3840;
     int32_t columns = 2160;
-    int32_t channels = 3;
+    int32_t channels = 1;
     std::string tensor_name{"signal"};
     auto tx = make_operator<ops::PingTensorTxOp>("tx",
                                                  Arg("rows", rows),
@@ -46,16 +46,18 @@ class GenerateAndDemosaicFragment : public holoscan::Fragment {
                                                  make_condition<CountCondition>(3));
 
     auto cuda_stream_pool = make_resource<CudaStreamPool>("cuda_stream", 0, 0, 0, 1, 5);
+    bool generate_alpha = false;
+    int32_t out_channels = generate_alpha ? 4 : 3;
     ArgList demosaic_arglist = ArgList{
         Arg("in_tensor_name", tensor_name),
         Arg("out_tensor_name", tensor_name),
-        Arg("generate_alpha", false),
+        Arg("generate_alpha", generate_alpha),
         Arg("bayer_grid_pos", 2),
         Arg("interpolation_mode", 0),
         // The pool size is set to 10 to prevent memory allocation errors during testing.
         // Additional memory pool may be required as UcxTransmitter sends data asynchronously
         // without checking the receiver's queue.
-        Arg("pool", make_resource<BlockMemoryPool>("pool", 1, rows * columns * channels, 10)),
+        Arg("pool", make_resource<BlockMemoryPool>("pool", 1, rows * columns * out_channels, 10)),
         Arg("cuda_stream_pool", cuda_stream_pool)};
 
     std::shared_ptr<Operator> bayer_demosaic;
@@ -69,7 +71,7 @@ class RxFragment : public holoscan::Fragment {
  public:
   void compose() override {
     using namespace holoscan;
-    auto rx = make_operator<ops::PingTensorRxOp>("rx", Arg("tensor_name", std::string("signal")));
+    auto rx = make_operator<ops::PingTensorRxOp>("rx");
     add_operator(rx);
   }
 };
@@ -121,9 +123,13 @@ TEST(DistributedDemosaicOpApp, TestDistributedDummyDemosaicApp) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("Graph activation failed") == std::string::npos);
+  EXPECT_TRUE(log_output.find("Graph activation failed") == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
   // Currently expect a warning that the CUDA stream ID object will not be serialized
   // over the distributed connection.
   std::string serializer_warning = "No serializer found for component 'cuda_stream_id_'";
-  EXPECT_TRUE(log_output.find(serializer_warning) != std::string::npos);
+  EXPECT_TRUE(log_output.find(serializer_warning) != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
diff --git a/tests/system/distributed/holoscan_ucx_ports_env.cpp b/tests/system/distributed/holoscan_ucx_ports_env.cpp
index df3d426..479bd14 100644
--- a/tests/system/distributed/holoscan_ucx_ports_env.cpp
+++ b/tests/system/distributed/holoscan_ucx_ports_env.cpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -81,6 +81,16 @@ TEST(HOLOSCAN_UCX_PORTS, TestUCXBroadCastMultiReceiverAppLocal) {
     // use DEBUG log level to be able to check detailed messages in the output
     set_log_level(LogLevel::DEBUG);
 
+    // Collect three unused network ports starting from 50007 for verification
+    auto unused_ports = get_unused_network_ports(3, 50007, 65535, {}, {50007});
+    EXPECT_EQ(unused_ports.size(), 3);
+    EXPECT_GE(unused_ports[0], 50007);
+    EXPECT_GT(unused_ports[1], unused_ports[0]);
+    EXPECT_GT(unused_ports[2], unused_ports[1]);
+    EXPECT_LE(unused_ports[2], 65535);
+
+    auto verification_str = fmt::format("unused_ports={}", fmt::join(unused_ports, ","));
+
     // 'AppDriver::launch_fragments_async()' path will be tested.
     auto app = make_application<UCXBroadCastMultiReceiverApp>();
 
@@ -91,12 +101,16 @@ TEST(HOLOSCAN_UCX_PORTS, TestUCXBroadCastMultiReceiverAppLocal) {
 
     std::string log_output = testing::internal::GetCapturedStderr();
 
-    EXPECT_TRUE(log_output.find("unused_ports=50007,50008,50009") != std::string::npos)
+    EXPECT_TRUE(log_output.find(verification_str) != std::string::npos)
         << "=== LOG ===\n"
         << log_output << "\n===========\n";
     EXPECT_TRUE(log_output.find("RxParam fragment2.rx message received (count: 10, size: 2)") !=
-                std::string::npos);
-    EXPECT_TRUE(log_output.find("Rx fragment4.rx message received count: 10") != std::string::npos);
+                std::string::npos)
+        << "=== LOG ===\n"
+        << log_output << "\n===========\n";
+    EXPECT_TRUE(log_output.find("Rx fragment4.rx message received count: 10") != std::string::npos)
+        << "=== LOG ===\n"
+        << log_output << "\n===========\n";
   }
 
   // restore the log level
@@ -118,6 +132,16 @@ TEST(HOLOSCAN_UCX_PORTS, TestUCXBroadCastMultiReceiverAppWorker) {
     // use DEBUG log level to be able to check detailed messages in the output
     set_log_level(LogLevel::DEBUG);
 
+    // Collect three unused network ports including port numbers 50101, 50105 for verification
+    auto unused_ports = get_unused_network_ports(3, 50101, 65535, {}, {50101, 50105});
+    EXPECT_EQ(unused_ports.size(), 3);
+    EXPECT_GE(unused_ports[0], 50007);
+    EXPECT_GT(unused_ports[1], unused_ports[0]);
+    EXPECT_GT(unused_ports[2], unused_ports[1]);
+    EXPECT_LE(unused_ports[2], 65535);
+
+    auto verification_str = fmt::format("unused_ports={}", fmt::join(unused_ports, ","));
+
     // With this arguments, this will go through 'AppWorkerServiceImpl::GetAvailablePorts()' path
     std::vector<std::string> args{"app", "--driver", "--worker", "--fragments=all"};
     auto app = make_application<UCXBroadCastMultiReceiverApp>(args);
@@ -129,12 +153,16 @@ TEST(HOLOSCAN_UCX_PORTS, TestUCXBroadCastMultiReceiverAppWorker) {
 
     std::string log_output = testing::internal::GetCapturedStderr();
 
-    EXPECT_TRUE(log_output.find("unused_ports=50101,50105,50106") != std::string::npos)
+    EXPECT_TRUE(log_output.find(verification_str) != std::string::npos)
         << "=== LOG ===\n"
         << log_output << "\n===========\n";
     EXPECT_TRUE(log_output.find("RxParam fragment2.rx message received (count: 10, size: 2)") !=
-                std::string::npos);
-    EXPECT_TRUE(log_output.find("Rx fragment4.rx message received count: 10") != std::string::npos);
+                std::string::npos)
+        << "=== LOG ===\n"
+        << log_output << "\n===========\n";
+    EXPECT_TRUE(log_output.find("Rx fragment4.rx message received count: 10") != std::string::npos)
+        << "=== LOG ===\n"
+        << log_output << "\n===========\n";
   }
 
   // restore the log level
diff --git a/tests/system/distributed/standalone_fragments.cpp b/tests/system/distributed/standalone_fragments.cpp
index 30409be..f6f7eb6 100644
--- a/tests/system/distributed/standalone_fragments.cpp
+++ b/tests/system/distributed/standalone_fragments.cpp
@@ -103,8 +103,12 @@ TEST(DistributedApp, TestStandaloneFragments) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("Operator: tx, Index: 10") != std::string::npos);
-  EXPECT_TRUE(log_output.find("Operator: rx, Index: 5") != std::string::npos);
+  EXPECT_TRUE(log_output.find("Operator: tx, Index: 10") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Operator: rx, Index: 5") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 }  // namespace holoscan
diff --git a/tests/system/distributed/ucx_message_serialization_ping_app.cpp b/tests/system/distributed/ucx_message_serialization_ping_app.cpp
index 7df66cc..886eed5 100644
--- a/tests/system/distributed/ucx_message_serialization_ping_app.cpp
+++ b/tests/system/distributed/ucx_message_serialization_ping_app.cpp
@@ -74,8 +74,9 @@ TEST_P(MessageTypeParmeterizedTestFixture, TestMessageSerializationApp) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("Found expected value in deserialized message.") !=
-              std::string::npos);
+  EXPECT_TRUE(log_output.find("Found expected value in deserialized message.") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 INSTANTIATE_TEST_CASE_P(MessageSerializationAppTests, MessageTypeParmeterizedTestFixture,
@@ -166,10 +167,13 @@ TEST_P(UcxMessageTypeParmeterizedTestFixture, TestUcxMessageSerializationApp) {
 
   // check for the string that gets printed if receive value validation succeeded
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("Found expected value in deserialized message.") !=
-              std::string::npos);
+  EXPECT_TRUE(log_output.find("Found expected value in deserialized message.") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 
-  EXPECT_TRUE(remove_ignored_errors(log_output).find("error") == std::string::npos);
+  EXPECT_TRUE(remove_ignored_errors(log_output).find("error") == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 
   // restore the original log level
   if (message_type == MessageType::VEC_DOUBLE_LARGE) {
diff --git a/tests/system/exception_handling.cpp b/tests/system/exception_handling.cpp
index fc6b4cf..aa764b8 100644
--- a/tests/system/exception_handling.cpp
+++ b/tests/system/exception_handling.cpp
@@ -71,6 +71,9 @@ class MinimalThrowOp : public Operator {
 
     Operator::initialize();
 
+    // Make sure that calling initialize() twice does not cause any issues
+    Operator::initialize();
+
     if (throw_type_.get() == ThrowMethod::kInitialize) {
       throw std::runtime_error("Exception occurred in MinimalThrowOp::initialize");
     }
@@ -145,10 +148,14 @@ TEST_P(MethodParmeterizedTestFixture, TestMethodExceptionHandling) {
 
   std::string log_output = testing::internal::GetCapturedStderr();
   if ((throw_method != ThrowMethod::kNone)) {
-    EXPECT_TRUE(log_output.find("Exception occurred in MinimalThrowOp") != std::string::npos);
+    EXPECT_TRUE(log_output.find("Exception occurred in MinimalThrowOp") != std::string::npos)
+        << "=== LOG ===\n"
+        << log_output << "\n===========\n";
     if (throw_method != ThrowMethod::kInitialize) {
       // exception in initialize is before graph start, so this would not be printed
-      EXPECT_TRUE(log_output.find("Graph execution error: ") != std::string::npos);
+      EXPECT_TRUE(log_output.find("Graph execution error: ") != std::string::npos)
+          << "=== LOG ===\n"
+          << log_output << "\n===========\n";
     }
   }
 }
diff --git a/tests/system/format_converter_op_apps.cpp b/tests/system/format_converter_op_apps.cpp
index 022a3d8..075ab86 100644
--- a/tests/system/format_converter_op_apps.cpp
+++ b/tests/system/format_converter_op_apps.cpp
@@ -22,12 +22,12 @@
 #include <utility>
 
 #include "../config.hpp"
-#include "ping_tensor_rx_op.hpp"
-#include "ping_tensor_tx_op.hpp"
 #include "tensor_compare_op.hpp"
 
 #include "holoscan/holoscan.hpp"
 #include "holoscan/operators/format_converter/format_converter.hpp"
+#include "holoscan/operators/ping_tensor_rx/ping_tensor_rx.hpp"
+#include "holoscan/operators/ping_tensor_tx/ping_tensor_tx.hpp"
 
 using namespace holoscan;
 
@@ -57,7 +57,7 @@ class FormatConverterApp : public holoscan::Application {
                                                            pool,
                                                            Arg("in_tensor_name", in_tensor_name),
                                                            Arg("out_tensor_name", out_tensor_name));
-    auto rx = make_operator<ops::PingTensorRxOp>("rx", Arg("tensor_name", out_tensor_name));
+    auto rx = make_operator<ops::PingTensorRxOp>("rx");
 
     add_flow(source, converter, {{"out", "source_video"}});
     add_flow(converter, rx, {{"tensor", "in"}});
@@ -84,9 +84,12 @@ void run_app(const std::string& failure_str = "", const std::string& storage_typ
   }
   std::string log_output = testing::internal::GetCapturedStderr();
   if (failure_str.empty()) {
-    EXPECT_TRUE(log_output.find("error") == std::string::npos) << log_output;
+    EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                               << log_output << "\n===========\n";
   } else {
-    EXPECT_TRUE(log_output.find(failure_str) != std::string::npos) << log_output;
+    EXPECT_TRUE(log_output.find(failure_str) != std::string::npos)
+        << "=== LOG ===\n"
+        << log_output << "\n===========\n";
   }
 }
 
diff --git a/tests/system/holoviz_op_apps.cpp b/tests/system/holoviz_op_apps.cpp
index 1e38992..f38ef2b 100644
--- a/tests/system/holoviz_op_apps.cpp
+++ b/tests/system/holoviz_op_apps.cpp
@@ -22,12 +22,12 @@
 #include <utility>
 
 #include "../config.hpp"
-#include "ping_tensor_tx_op.hpp"
 #include "tensor_compare_op.hpp"
 
 #include "holoscan/holoscan.hpp"
 #include "holoscan/operators/format_converter/format_converter.hpp"
 #include "holoscan/operators/holoviz/holoviz.hpp"
+#include "holoscan/operators/ping_tensor_tx/ping_tensor_tx.hpp"
 
 using namespace holoscan;
 
@@ -110,9 +110,12 @@ void run_app(StringOrArg enable_arg, const std::string& failure_str = "",
   }
   std::string log_output = testing::internal::GetCapturedStderr();
   if (failure_str.empty()) {
-    EXPECT_TRUE(log_output.find("error") == std::string::npos) << log_output;
+    EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                               << log_output << "\n===========\n";
   } else {
-    EXPECT_TRUE(log_output.find(failure_str) != std::string::npos) << log_output;
+    EXPECT_TRUE(log_output.find(failure_str) != std::string::npos)
+        << "=== LOG ===\n"
+        << log_output << "\n===========\n";
   }
 }
 
diff --git a/tests/system/jobstatistics_app.cpp b/tests/system/jobstatistics_app.cpp
index 58911ab..c813085 100644
--- a/tests/system/jobstatistics_app.cpp
+++ b/tests/system/jobstatistics_app.cpp
@@ -53,7 +53,9 @@ TEST(JobStatisticsApp, TestJobStatisticsDisabled) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStdout();
-  EXPECT_TRUE(log_output.find("Job Statistics Report") == std::string::npos);
+  EXPECT_TRUE(log_output.find("Job Statistics Report") == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(JobStatisticsApp, TestJobStatisticsEnabled) {
@@ -70,10 +72,14 @@ TEST(JobStatisticsApp, TestJobStatisticsEnabled) {
   app->run();
 
   std::string console_output = testing::internal::GetCapturedStdout();
-  EXPECT_TRUE(console_output.find("Job Statistics Report") != std::string::npos);
+  EXPECT_TRUE(console_output.find("Job Statistics Report") != std::string::npos)
+      << "=== CONSOLE (stdout) ===\n"
+      << console_output << "\n===========\n";
 
   // Codelet statistics report is disabled by default
-  EXPECT_TRUE(console_output.find("Codelet Statistics Report") == std::string::npos);
+  EXPECT_TRUE(console_output.find("Codelet Statistics Report") == std::string::npos)
+      << "=== CONSOLE (stdout) ===\n"
+      << console_output << "\n===========\n";
 }
 
 TEST(JobStatisticsApp, TestJobStatisticsEnabledCountSet) {
@@ -95,11 +101,15 @@ TEST(JobStatisticsApp, TestJobStatisticsEnabledCountSet) {
 
   std::string console_output = testing::internal::GetCapturedStdout();
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(console_output.find("Job Statistics Report") != std::string::npos);
+  EXPECT_TRUE(console_output.find("Job Statistics Report") != std::string::npos)
+      << "=== CONSOLE (stdout) ===\n"
+      << console_output << "\n===========\n";
 
   // Rely on DEBUG level log output to detect the event_history_count that was set as the
   // value is not shown in the report itself.
-  EXPECT_TRUE(log_output.find("event_history_count: 35") != std::string::npos);
+  EXPECT_TRUE(log_output.find("event_history_count: 35") != std::string::npos)
+      << "=== LOG (stderr) ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(JobStatisticsApp, TestJobStatisticsCodeletReportEnabled) {
@@ -117,8 +127,12 @@ TEST(JobStatisticsApp, TestJobStatisticsCodeletReportEnabled) {
   app->run();
 
   std::string console_output = testing::internal::GetCapturedStdout();
-  EXPECT_TRUE(console_output.find("Job Statistics Report") != std::string::npos);
-  EXPECT_TRUE(console_output.find("Codelet Statistics Report") != std::string::npos);
+  EXPECT_TRUE(console_output.find("Job Statistics Report") != std::string::npos)
+      << "=== CONSOLE (stdout) ===\n"
+      << console_output << "\n===========\n";
+  EXPECT_TRUE(console_output.find("Codelet Statistics Report") != std::string::npos)
+      << "=== CONSOLE (stdout) ===\n"
+      << console_output << "\n===========\n";
 }
 
 TEST(JobStatisticsApp, TestJobStatisticsFilePathSet) {
@@ -139,7 +153,9 @@ TEST(JobStatisticsApp, TestJobStatisticsFilePathSet) {
   app->run();
 
   std::string console_output = testing::internal::GetCapturedStdout();
-  EXPECT_TRUE(console_output.find("Job Statistics Report") != std::string::npos);
+  EXPECT_TRUE(console_output.find("Job Statistics Report") != std::string::npos)
+      << "=== CONSOLE (stdout) ===\n"
+      << console_output << "\n===========\n";
 
   // check that the expected JSON file was created
   EXPECT_TRUE(std::filesystem::exists(file_path));
diff --git a/tests/system/loading_gxf_extension.cpp b/tests/system/loading_gxf_extension.cpp
index a6eda97..c856a63 100644
--- a/tests/system/loading_gxf_extension.cpp
+++ b/tests/system/loading_gxf_extension.cpp
@@ -91,8 +91,8 @@ TEST(Extensions, LoadInsideComposeMethod) {
     pos = log_output.find("Hello world", pos + 1);
   }
   EXPECT_EQ(count, 10) << "Expected to find 10 instances of 'Hello world' in log output, but found "
-                       << count << ".\nLog output:\n"
-                       << log_output;
+                       << count << "\n=== LOG ===\n"
+                       << log_output << "\n===========\n";
 }
 
 TEST(Extensions, LoadOutsideApp) {
@@ -118,8 +118,8 @@ TEST(Extensions, LoadOutsideApp) {
     pos = log_output.find("Hello world", pos + 1);
   }
   EXPECT_EQ(count, 10) << "Expected to find 10 instances of 'Hello world' in log output, but found "
-                       << count << ".\nLog output:\n"
-                       << log_output;
+                       << count << "\n=== LOG ===\n"
+                       << log_output << "\n===========\n";
 }
 
 TEST(Extensions, LoadFromConfigFile) {
@@ -142,8 +142,8 @@ TEST(Extensions, LoadFromConfigFile) {
     pos = log_output.find("Hello world", pos + 1);
   }
   EXPECT_EQ(count, 10) << "Expected to find 10 instances of 'Hello world' in log output, but found "
-                       << count << ".\nLog output:\n"
-                       << log_output;
+                       << count << "\n=== LOG ===\n"
+                       << log_output << "\n===========\n";
 }
 
 TEST(Extensions, LoadFromConfigFileAfterAccessingExecutor) {
@@ -168,8 +168,8 @@ TEST(Extensions, LoadFromConfigFileAfterAccessingExecutor) {
     pos = log_output.find("Hello world", pos + 1);
   }
   EXPECT_EQ(count, 10) << "Expected to find 10 instances of 'Hello world' in log output, but found "
-                       << count << ".\nLog output:\n"
-                       << log_output;
+                       << count << "\n=== LOG ===\n"
+                       << log_output << "\n===========\n";
 }
 
 }  // namespace holoscan
diff --git a/tests/system/multi_receiver_operator_ping_app.cpp b/tests/system/multi_receiver_operator_ping_app.cpp
index 18af415..17cab5a 100644
--- a/tests/system/multi_receiver_operator_ping_app.cpp
+++ b/tests/system/multi_receiver_operator_ping_app.cpp
@@ -26,6 +26,8 @@
 
 #include "env_wrapper.hpp"
 
+constexpr int kDefaultNumOfIterations = 10;
+
 namespace {
 
 class ValueData {
@@ -44,6 +46,8 @@ class ValueData {
   int data_;
 };
 
+// Operator definitions
+
 class PingTxOp : public holoscan::Operator {
  public:
   HOLOSCAN_OPERATOR_FORWARD_ARGS(PingTxOp)
@@ -66,6 +70,142 @@ class PingTxOp : public holoscan::Operator {
   int index_ = 1;
 };
 
+class PingNullSharedPtrTxOp : public holoscan::Operator {
+ public:
+  HOLOSCAN_OPERATOR_FORWARD_ARGS(PingNullSharedPtrTxOp)
+
+  PingNullSharedPtrTxOp() = default;
+
+  void setup(holoscan::OperatorSpec& spec) override {
+    spec.output<std::shared_ptr<ValueData>>("out1");
+    spec.output<std::shared_ptr<ValueData>>("out2");
+  }
+
+  void compute(holoscan::InputContext&, holoscan::OutputContext& op_output,
+               holoscan::ExecutionContext&) override {
+    auto value1 = std::make_shared<ValueData>(index_++);
+    op_output.emit(nullptr, "out1");
+
+    auto value2 = std::make_shared<ValueData>(index_++);
+    op_output.emit(value2, "out2");
+  };
+  int index_ = 1;
+};
+
+class PingRawNullPtrTxOp : public holoscan::Operator {
+ public:
+  HOLOSCAN_OPERATOR_FORWARD_ARGS(PingRawNullPtrTxOp)
+
+  PingRawNullPtrTxOp() = default;
+
+  void setup(holoscan::OperatorSpec& spec) override {
+    spec.output<const char*>("out1");
+    spec.output<const char*>("out2");
+  }
+
+  void compute(holoscan::InputContext&, holoscan::OutputContext& op_output,
+               holoscan::ExecutionContext&) override {
+    static const char values[] = {
+        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
+    op_output.emit(nullptr, "out1");
+
+    auto value2 = &values[(index_++) % 16];
+    op_output.emit(value2, "out2");
+  };
+  int index_ = 0;
+};
+
+class PingTensorMapTxOp : public holoscan::Operator {
+ public:
+  HOLOSCAN_OPERATOR_FORWARD_ARGS(PingTensorMapTxOp)
+
+  PingTensorMapTxOp() = default;
+
+  void setup(holoscan::OperatorSpec& spec) override {
+    spec.output<holoscan::TensorMap>("out1");
+    spec.output<holoscan::TensorMap>("out2");
+  }
+
+  void initialize() {
+    // Create an allocator for the tensors
+    auto frag = fragment();
+    allocator_ = frag->make_resource<holoscan::UnboundedAllocator>("allocator");
+    add_arg(allocator_.get());
+
+    Operator::initialize();
+  }
+
+  void compute(holoscan::InputContext&, holoscan::OutputContext& op_output,
+               holoscan::ExecutionContext& context) override {
+    const nvidia::gxf::Shape out_shape1{1, 2, 3};
+    const nvidia::gxf::Shape out_shape2{3, 2, 1};
+    const nvidia::gxf::Shape out_shape3{2, 3, 4};
+    const nvidia::gxf::Shape out_shape4{4, 3, 2};
+
+    // Get Handle to underlying nvidia::gxf::Allocator from std::shared_ptr<holoscan::Allocator>
+    auto pool = nvidia::gxf::Handle<nvidia::gxf::Allocator>::Create(context.context(),
+                                                                    allocator_->gxf_cid());
+    const auto maybe_tensormap1 = nvidia::gxf::CreateTensorMap(
+        context.context(),
+        pool.value(),
+        {{"tensor_a",
+          nvidia::gxf::MemoryStorageType::kDevice,
+          out_shape1,
+          nvidia::gxf::PrimitiveType::kUnsigned8,
+          0,
+          nvidia::gxf::ComputeTrivialStrides(
+              out_shape1, nvidia::gxf::PrimitiveTypeSize(nvidia::gxf::PrimitiveType::kUnsigned8))},
+         {"tensor_common",
+          nvidia::gxf::MemoryStorageType::kDevice,
+          out_shape2,
+          nvidia::gxf::PrimitiveType::kUnsigned8,
+          0,
+          nvidia::gxf::ComputeTrivialStrides(
+              out_shape2, nvidia::gxf::PrimitiveTypeSize(nvidia::gxf::PrimitiveType::kUnsigned8))}},
+        false);
+
+    const auto maybe_tensormap2 = nvidia::gxf::CreateTensorMap(
+        context.context(),
+        pool.value(),
+        {{"tensor_c",
+          nvidia::gxf::MemoryStorageType::kDevice,
+          out_shape3,
+          nvidia::gxf::PrimitiveType::kUnsigned8,
+          0,
+          nvidia::gxf::ComputeTrivialStrides(
+              out_shape3, nvidia::gxf::PrimitiveTypeSize(nvidia::gxf::PrimitiveType::kUnsigned8))},
+         {"tensor_common",
+          nvidia::gxf::MemoryStorageType::kDevice,
+          out_shape4,
+          nvidia::gxf::PrimitiveType::kUnsigned8,
+          0,
+          nvidia::gxf::ComputeTrivialStrides(
+              out_shape4, nvidia::gxf::PrimitiveTypeSize(nvidia::gxf::PrimitiveType::kUnsigned8))}},
+        false);
+
+    if (!maybe_tensormap1 || !maybe_tensormap2) {
+      throw std::runtime_error("Failed to create TensorMap");
+    }
+
+    auto gxf_entity1 = holoscan::gxf::Entity(maybe_tensormap1.value());
+    auto gxf_entity2 = holoscan::gxf::Entity(maybe_tensormap2.value());
+
+    holoscan::TensorMap tensor_map1;
+    tensor_map1.insert({"tensor_a", gxf_entity1.get<holoscan::Tensor>("tensor_a")});
+    tensor_map1.insert({"tensor_common", gxf_entity1.get<holoscan::Tensor>("tensor_common")});
+    op_output.emit(tensor_map1, "out1");
+
+    holoscan::TensorMap tensor_map2;
+    tensor_map2.insert({"tensor_c", gxf_entity2.get<holoscan::Tensor>("tensor_c")});
+    tensor_map2.insert({"tensor_common", gxf_entity2.get<holoscan::Tensor>("tensor_common")});
+    op_output.emit(tensor_map2, "out2");
+  };
+
+ private:
+  holoscan::Parameter<std::shared_ptr<holoscan::Allocator>> allocator_{nullptr};
+  int index_ = 0;
+};
+
 class PingMxOp : public holoscan::Operator {
  public:
   HOLOSCAN_OPERATOR_FORWARD_ARGS(PingMxOp)
@@ -133,7 +273,13 @@ class PingRxOp : public holoscan::Operator {
 
       std::vector<int> data_vector;
       data_vector.reserve(value_vector.size());
-      for (const auto& item : value_vector) { data_vector.push_back(item->data()); }
+      for (const auto& item : value_vector) {
+        if (item) {
+          data_vector.push_back(item->data());
+        } else {
+          data_vector.push_back(-1);
+        }
+      }
       HOLOSCAN_LOG_INFO("Rx message values: [{}]", fmt::join(data_vector, ", "));
     } else {
       while (true) {
@@ -141,7 +287,11 @@ class PingRxOp : public holoscan::Operator {
         if (!maybe_value) { break; }
         auto value = maybe_value.value();
         HOLOSCAN_LOG_INFO("Rx message received (count: {}, size: 1)", count_++);
-        HOLOSCAN_LOG_INFO("Rx message value: {}", value->data());
+        if (value) {
+          HOLOSCAN_LOG_INFO("Rx message value: {}", value->data());
+        } else {
+          HOLOSCAN_LOG_INFO("Rx message value: -1");
+        }
       }
     }
   };
@@ -152,16 +302,214 @@ class PingRxOp : public holoscan::Operator {
   int count_ = 1;
 };
 
-class PingMultiPort : public holoscan::Application {
+class PingRawPtrRxOp : public holoscan::Operator {
  public:
-  PingMultiPort(bool should_receive_vector, holoscan::IOSpec::IOSize queue_size,
-                const std::vector<std::string>& argv = {})
+  HOLOSCAN_OPERATOR_FORWARD_ARGS(PingRawPtrRxOp)
+  HOLOSCAN_OPERATOR_FORWARD_TEMPLATE()
+  PingRawPtrRxOp(bool should_receive_vector, holoscan::IOSpec::IOSize queue_size, ArgT&& arg,
+                 ArgsT&&... args)
+      : should_receive_vector_(should_receive_vector),
+        queue_size_(queue_size),
+        Operator(std::forward<ArgT>(arg), std::forward<ArgsT>(args)...) {}
+
+  PingRawPtrRxOp(bool should_receive_vector, holoscan::IOSpec::IOSize queue_size)
+      : should_receive_vector_(should_receive_vector), queue_size_(queue_size) {}
+
+  void setup(holoscan::OperatorSpec& spec) override {
+    if (queue_size_ == holoscan::IOSpec::kSizeOne) {
+      spec.input<const char*>("receivers");
+    } else {
+      spec.input<std::vector<const char*>>("receivers", queue_size_);
+    }
+  }
+
+  void compute(holoscan::InputContext& op_input, holoscan::OutputContext&,
+               holoscan::ExecutionContext&) override {
+    if (should_receive_vector_) {
+      auto maybe_value_vector = op_input.receive<std::vector<const char*>>("receivers");
+
+      if (!maybe_value_vector) {
+        HOLOSCAN_LOG_INFO("Unable to receive vector of raw pointers: {}",
+                          maybe_value_vector.error().what());
+        return;
+      }
+
+      auto& value_vector = maybe_value_vector.value();
+      HOLOSCAN_LOG_INFO("Rx message received (count: {}, size: {})", count_++, value_vector.size());
+
+      std::vector<char> data_vector;
+      data_vector.reserve(value_vector.size());
+      for (const auto& item : value_vector) {
+        if (item) {
+          data_vector.push_back(*item);
+        } else {
+          data_vector.push_back('N');
+        }
+      }
+      HOLOSCAN_LOG_INFO("Rx message values: [{}]", fmt::join(data_vector, ", "));
+    } else {
+      while (true) {
+        auto maybe_value = op_input.receive<const char*>("receivers");
+        if (!maybe_value) { break; }
+        auto value = maybe_value.value();
+        HOLOSCAN_LOG_INFO("Rx message received (count: {}, size: 1)", count_++);
+        if (value) {
+          HOLOSCAN_LOG_INFO("Rx message value: {}", *value);
+        } else {
+          HOLOSCAN_LOG_INFO("Rx message value: N");
+        }
+      }
+    }
+  };
+
+ private:
+  bool should_receive_vector_ = false;
+  holoscan::IOSpec::IOSize queue_size_ = holoscan::IOSpec::kSizeOne;
+  int count_ = 1;
+};
+
+class PingTensorMapRxOp : public holoscan::Operator {
+ public:
+  HOLOSCAN_OPERATOR_FORWARD_ARGS(PingTensorMapRxOp)
+  HOLOSCAN_OPERATOR_FORWARD_TEMPLATE()
+  PingTensorMapRxOp(bool should_receive_vector, holoscan::IOSpec::IOSize queue_size, ArgT&& arg,
+                    ArgsT&&... args)
+      : should_receive_vector_(should_receive_vector),
+        queue_size_(queue_size),
+        Operator(std::forward<ArgT>(arg), std::forward<ArgsT>(args)...) {}
+
+  PingTensorMapRxOp(bool should_receive_vector, holoscan::IOSpec::IOSize queue_size)
+      : should_receive_vector_(should_receive_vector), queue_size_(queue_size) {}
+
+  void setup(holoscan::OperatorSpec& spec) override {
+    if (queue_size_ == holoscan::IOSpec::kSizeOne) {
+      spec.input<holoscan::TensorMap>("receivers");
+    } else {
+      spec.input<std::vector<holoscan::TensorMap>>("receivers", queue_size_);
+    }
+  }
+
+  void compute(holoscan::InputContext& op_input, holoscan::OutputContext&,
+               holoscan::ExecutionContext&) override {
+    if (should_receive_vector_) {
+      auto value_vector = op_input.receive<std::vector<holoscan::TensorMap>>("receivers").value();
+
+      HOLOSCAN_LOG_INFO("Rx message received (count: {}, size: {})", count_, value_vector.size());
+
+      for (const auto& value : value_vector) {
+        for (const auto& [name, tensor] : value) {
+          std::vector<int> data_vector;
+          if (tensor) {
+            for (const auto& shape : tensor->shape()) { data_vector.push_back(shape); }
+          }
+          HOLOSCAN_LOG_INFO(
+              "Rx message values {} (count: {}): [{}]", name, count_, fmt::join(data_vector, ", "));
+        }
+      }
+    } else {
+      while (true) {
+        auto maybe_value = op_input.receive<holoscan::TensorMap>("receivers");
+        if (!maybe_value) { break; }
+        auto value = maybe_value.value();
+        for (const auto& [name, tensor] : value) {
+          std::vector<int> data_vector;
+          if (tensor) {
+            for (const auto& shape : tensor->shape()) { data_vector.push_back(shape); }
+          }
+          HOLOSCAN_LOG_INFO(
+              "Rx message values {} (count: {}): [{}]", name, count_, fmt::join(data_vector, ", "));
+        }
+      }
+    }
+
+    count_++;
+  };
+
+ private:
+  bool should_receive_vector_ = false;
+  holoscan::IOSpec::IOSize queue_size_ = holoscan::IOSpec::kSizeOne;
+  int count_ = 1;
+};
+
+class PingEntityRxOp : public holoscan::Operator {
+ public:
+  HOLOSCAN_OPERATOR_FORWARD_ARGS(PingEntityRxOp)
+  HOLOSCAN_OPERATOR_FORWARD_TEMPLATE()
+  PingEntityRxOp(bool should_receive_vector, holoscan::IOSpec::IOSize queue_size, ArgT&& arg,
+                 ArgsT&&... args)
+      : should_receive_vector_(should_receive_vector),
+        queue_size_(queue_size),
+        Operator(std::forward<ArgT>(arg), std::forward<ArgsT>(args)...) {}
+
+  PingEntityRxOp(bool should_receive_vector, holoscan::IOSpec::IOSize queue_size)
+      : should_receive_vector_(should_receive_vector), queue_size_(queue_size) {}
+
+  void setup(holoscan::OperatorSpec& spec) override {
+    if (queue_size_ == holoscan::IOSpec::kSizeOne) {
+      spec.input<holoscan::gxf::Entity>("receivers");
+    } else {
+      spec.input<std::vector<holoscan::gxf::Entity>>("receivers", queue_size_);
+    }
+  }
+
+  void compute(holoscan::InputContext& op_input, holoscan::OutputContext&,
+               holoscan::ExecutionContext&) override {
+    if (should_receive_vector_) {
+      auto value_vector = op_input.receive<std::vector<holoscan::gxf::Entity>>("receivers").value();
+
+      HOLOSCAN_LOG_INFO("Rx message received (count: {}, size: {})", count_, value_vector.size());
+
+      for (const auto& value : value_vector) {
+        std::shared_ptr<holoscan::Tensor> tensor_common =
+            value.get<holoscan::Tensor>("tensor_common");
+        std::vector<int> data_vector;
+        if (tensor_common) {
+          for (const auto& shape : tensor_common->shape()) { data_vector.push_back(shape); }
+        }
+        HOLOSCAN_LOG_INFO("Rx message values tensor_common (count: {}): [{}]",
+                          count_,
+                          fmt::join(data_vector, ", "));
+      }
+    } else {
+      while (true) {
+        auto maybe_value = op_input.receive<holoscan::gxf::Entity>("receivers");
+        if (!maybe_value) { break; }
+        auto value = maybe_value.value();
+
+        std::shared_ptr<holoscan::Tensor> tensor_common =
+            value.get<holoscan::Tensor>("tensor_common");
+        std::vector<int> data_vector;
+        if (tensor_common) {
+          for (const auto& shape : tensor_common->shape()) { data_vector.push_back(shape); }
+        }
+        HOLOSCAN_LOG_INFO("Rx message values tensor_common (count: {}): [{}]",
+                          count_,
+                          fmt::join(data_vector, ", "));
+      }
+    }
+
+    count_++;
+  };
+
+ private:
+  bool should_receive_vector_ = false;
+  holoscan::IOSpec::IOSize queue_size_ = holoscan::IOSpec::kSizeOne;
+  int count_ = 1;
+};
+
+// Application definitions
+
+class PingMultiPortApp : public holoscan::Application {
+ public:
+  PingMultiPortApp(bool should_receive_vector, holoscan::IOSpec::IOSize queue_size,
+                   const std::vector<std::string>& argv = {})
       : should_receive_vector_(should_receive_vector), queue_size_(queue_size), Application(argv) {}
   void compose() override {
     using namespace holoscan;
 
     // Define the tx, mx, rx operators, allowing the tx operator to execute 10 times
-    auto tx = make_operator<PingTxOp>("tx", make_condition<CountCondition>(10));
+    auto tx =
+        make_operator<PingTxOp>("tx", make_condition<CountCondition>(kDefaultNumOfIterations));
     auto mx = make_operator<PingMxOp>("mx", Arg("multiplier", 3));
     auto rx = make_operator<PingRxOp>("rx", should_receive_vector_, queue_size_);
 
@@ -175,10 +523,38 @@ class PingMultiPort : public holoscan::Application {
   holoscan::IOSpec::IOSize queue_size_ = holoscan::IOSpec::kSizeOne;
 };
 
+template <typename SendOp, typename ReceiveOp>
+class PingMultiPortDataApp : public holoscan::Application {
+ public:
+  PingMultiPortDataApp(bool should_receive_vector, holoscan::IOSpec::IOSize queue_size,
+                       int count = kDefaultNumOfIterations,
+                       const std::vector<std::string>& argv = {})
+      : should_receive_vector_(should_receive_vector),
+        queue_size_(queue_size),
+        count_(count),
+        Application(argv) {}
+
+  void compose() override {
+    using namespace holoscan;
+
+    // Define the tx, mx, rx operators, allowing the tx operator to execute 10 times
+    auto tx = make_operator<SendOp>("tx", make_condition<CountCondition>(count_));
+    auto rx = make_operator<ReceiveOp>("rx", should_receive_vector_, queue_size_);
+
+    // Define the workflow
+    add_flow(tx, rx, {{"out1", "receivers"}, {"out2", "receivers"}});
+  }
+
+ private:
+  bool should_receive_vector_ = false;
+  holoscan::IOSpec::IOSize queue_size_ = holoscan::IOSpec::kSizeOne;
+  int count_ = kDefaultNumOfIterations;
+};
+
 };  // namespace
 
 TEST(MultiReceiverOperatorPingApp, TestPingMultiPortMultiAnySize) {
-  auto app = holoscan::make_application<PingMultiPort>(true, holoscan::IOSpec::kAnySize);
+  auto app = holoscan::make_application<PingMultiPortApp>(true, holoscan::IOSpec::kAnySize);
 
   // capture output to check that the expected messages were logged
   testing::internal::CaptureStderr();
@@ -195,12 +571,12 @@ TEST(MultiReceiverOperatorPingApp, TestPingMultiPortMultiAnySize) {
 }
 
 TEST(MultiReceiverOperatorPingApp, TestPingMultiPortSingleAnySize) {
-  auto app = holoscan::make_application<PingMultiPort>(false, holoscan::IOSpec::kAnySize);
+  auto app = holoscan::make_application<PingMultiPortApp>(false, holoscan::IOSpec::kAnySize);
 
   // capture output to check that the expected messages were logged
   testing::internal::CaptureStderr();
 
-  EXPECT_THROW(app->run(), std::runtime_error);
+  EXPECT_THROW(app->run(), std::invalid_argument);
 
   // it is expected that the run will throw an exception because
   // IOSpec::kAnySize always expects a vector of values when receiving
@@ -214,7 +590,7 @@ TEST(MultiReceiverOperatorPingApp, TestPingMultiPortSingleAnySize) {
 }
 
 TEST(MultiReceiverOperatorPingApp, TestPingMultiPortMultiPrecedingCount) {
-  auto app = holoscan::make_application<PingMultiPort>(true, holoscan::IOSpec::kPrecedingCount);
+  auto app = holoscan::make_application<PingMultiPortApp>(true, holoscan::IOSpec::kPrecedingCount);
 
   // capture output to check that the expected messages were logged
   testing::internal::CaptureStderr();
@@ -235,7 +611,7 @@ TEST(MultiReceiverOperatorPingApp, TestPingMultiPortMultiPrecedingCount) {
 }
 
 TEST(MultiReceiverOperatorPingApp, TestPingMultiPortSinglePrecedingCount) {
-  auto app = holoscan::make_application<PingMultiPort>(false, holoscan::IOSpec::kPrecedingCount);
+  auto app = holoscan::make_application<PingMultiPortApp>(false, holoscan::IOSpec::kPrecedingCount);
 
   // capture output to check that the expected messages were logged
   testing::internal::CaptureStderr();
@@ -262,7 +638,7 @@ TEST(MultiReceiverOperatorPingApp, TestPingMultiPortSingleSizeFive) {
       std::make_pair("HOLOSCAN_EXECUTOR_LOG_LEVEL", "INFO"),  // quiet multi_thread_scheduler.cpp
   });
 
-  auto app = holoscan::make_application<PingMultiPort>(false, holoscan::IOSpec::IOSize(5));
+  auto app = holoscan::make_application<PingMultiPortApp>(false, holoscan::IOSpec::IOSize(5));
 
   // capture output to check that the expected messages were logged
   testing::internal::CaptureStderr();
@@ -279,8 +655,10 @@ TEST(MultiReceiverOperatorPingApp, TestPingMultiPortSingleSizeFive) {
       << "=== LOG ===\n"
       << log_output << "\n===========\n";
 
-  EXPECT_TRUE(log_output.find("No message is received from the input port with name 'receivers'") !=
-              std::string::npos)
+  EXPECT_TRUE(
+      log_output.find(
+          "ReceiveError on input port 'receivers': No message received from the input port") !=
+      std::string::npos)
       << "=== LOG ===\n"
       << log_output << "\n===========\n";
 
@@ -299,7 +677,7 @@ TEST(MultiReceiverOperatorPingApp, TestPingMultiPortSingleSizeFive) {
 }
 
 TEST(MultiReceiverOperatorPingApp, TestPingMultiPortInvalidQueueSize) {
-  auto app = holoscan::make_application<PingMultiPort>(false, holoscan::IOSpec::IOSize(-3));
+  auto app = holoscan::make_application<PingMultiPortApp>(false, holoscan::IOSpec::IOSize(-3));
 
   // capture output to check that the expected messages were logged
   testing::internal::CaptureStderr();
@@ -313,3 +691,336 @@ TEST(MultiReceiverOperatorPingApp, TestPingMultiPortInvalidQueueSize) {
       << "=== LOG ===\n"
       << log_output << "\n===========\n";
 }
+
+TEST(MultiReceiverOperatorPingApp, TestSendingNullSharedPtrMultiAnySize) {
+  using PingNullSharedPtrApp = PingMultiPortDataApp<PingNullSharedPtrTxOp, PingRxOp>;
+
+  auto app = holoscan::make_application<PingNullSharedPtrApp>(true, holoscan::IOSpec::kAnySize);
+
+  // capture output to check that the expected messages were logged
+  testing::internal::CaptureStderr();
+
+  app->run();
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(log_output.find("Rx message received (count: 10, size: 2)") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Rx message values: [-1, 20]") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST(MultiReceiverOperatorPingApp, TestSendingNullSharedPtrMultiPrecedingCount) {
+  using PingNullSharedPtrApp = PingMultiPortDataApp<PingNullSharedPtrTxOp, PingRxOp>;
+
+  auto app =
+      holoscan::make_application<PingNullSharedPtrApp>(true, holoscan::IOSpec::kPrecedingCount);
+
+  // capture output to check that the expected messages were logged
+  testing::internal::CaptureStderr();
+
+  app->run();
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(log_output.find("Rx message received (count: 10, size: 2)") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Rx message values: [-1, 20]") != std::string::npos ||
+              log_output.find("Rx message values: [20, -1]") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST(MultiReceiverOperatorPingApp, TestSendingNullSharedPtrSinglePrecedingCount) {
+  using PingNullSharedPtrApp = PingMultiPortDataApp<PingNullSharedPtrTxOp, PingRxOp>;
+
+  auto app =
+      holoscan::make_application<PingNullSharedPtrApp>(false, holoscan::IOSpec::kPrecedingCount);
+
+  // capture output to check that the expected messages were logged
+  testing::internal::CaptureStderr();
+
+  app->run();
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(log_output.find("Rx message received (count: 20, size: 1)") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Rx message value: -1") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Rx message value: 20") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST(MultiReceiverOperatorPingApp, TestSendingNullRawPointerMultiAnySize) {
+  using PingNullPtrApp = PingMultiPortDataApp<PingRawNullPtrTxOp, PingRawPtrRxOp>;
+
+  auto app = holoscan::make_application<PingNullPtrApp>(true, holoscan::IOSpec::kAnySize);
+
+  // capture output to check that the expected messages were logged
+  testing::internal::CaptureStderr();
+
+  app->run();
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(log_output.find("Rx message received (count: 10, size: 2)") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+
+  // since PingRawNullPtrTxOp's output ports (out1, out2) are handled in std::unordered_map order,
+  // the order of output port creation is not deterministic, so the order of the received
+  // messages may be different.
+  EXPECT_TRUE(log_output.find("Rx message values: [N, 9]") != std::string::npos ||
+              log_output.find("Rx message values: [9, N]") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST(MultiReceiverOperatorPingApp, TestSendingNullRawPointerMultiPrecedingCount) {
+  using PingNullPtrApp = PingMultiPortDataApp<PingRawNullPtrTxOp, PingRawPtrRxOp>;
+
+  auto app = holoscan::make_application<PingNullPtrApp>(true, holoscan::IOSpec::kPrecedingCount);
+
+  // capture output to check that the expected messages were logged
+  testing::internal::CaptureStderr();
+
+  app->run();
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(log_output.find("Rx message received (count: 10, size: 2)") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+
+  // since PingRawNullPtrTxOp's output ports (out1, out2) are handled in std::unordered_map order,
+  // the order of output port creation is not deterministic, so the order of the received
+  // messages may be different.
+  EXPECT_TRUE(log_output.find("Rx message values: [N, 9]") != std::string::npos ||
+              log_output.find("Rx message values: [9, N]") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST(MultiReceiverOperatorPingApp, TestSendingNullRawPointerSinglePrecedingCount) {
+  using PingNullPtrApp = PingMultiPortDataApp<PingRawNullPtrTxOp, PingRawPtrRxOp>;
+
+  auto app = holoscan::make_application<PingNullPtrApp>(false, holoscan::IOSpec::kPrecedingCount);
+
+  // capture output to check that the expected messages were logged
+  testing::internal::CaptureStderr();
+
+  app->run();
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(log_output.find("Rx message received (count: 20, size: 1)") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+
+  // since PingRawNullPtrTxOp's output ports (out1, out2) are handled in std::unordered_map order,
+  // the order of output port creation is not deterministic, so the order of the received
+  // messages may be different.
+  EXPECT_TRUE(log_output.find("Rx message value: N") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+
+  EXPECT_TRUE(log_output.find("Rx message value: 9") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST(MultiReceiverOperatorPingApp, TestSendingTensorMapDataMultiAnySize) {
+  using PingMultiPortTensorMapApp = PingMultiPortDataApp<PingTensorMapTxOp, PingTensorMapRxOp>;
+
+  auto app =
+      holoscan::make_application<PingMultiPortTensorMapApp>(true, holoscan::IOSpec::kAnySize);
+
+  // capture output to check that the expected messages were logged
+  testing::internal::CaptureStderr();
+
+  app->run();
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(log_output.find("Rx message received (count: 10, size: 2)") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+
+  EXPECT_TRUE(log_output.find("Rx message values tensor_a (count: 10): [1, 2, 3]") !=
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Rx message values tensor_common (count: 10): [3, 2, 1]") !=
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Rx message values tensor_c (count: 10): [2, 3, 4]") !=
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Rx message values tensor_common (count: 10): [4, 3, 2]") !=
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST(MultiReceiverOperatorPingApp, TestSendingTensorMapDataMultiPrecedingCount) {
+  using PingMultiPortTensorMapApp = PingMultiPortDataApp<PingTensorMapTxOp, PingTensorMapRxOp>;
+
+  auto app = holoscan::make_application<PingMultiPortTensorMapApp>(
+      true, holoscan::IOSpec::kPrecedingCount);
+
+  // capture output to check that the expected messages were logged
+  testing::internal::CaptureStderr();
+
+  app->run();
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(log_output.find("Rx message received (count: 10, size: 2)") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+
+  EXPECT_TRUE(log_output.find("Rx message values tensor_a (count: 10): [1, 2, 3]") !=
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Rx message values tensor_common (count: 10): [3, 2, 1]") !=
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Rx message values tensor_c (count: 10): [2, 3, 4]") !=
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Rx message values tensor_common (count: 10): [4, 3, 2]") !=
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST(MultiReceiverOperatorPingApp, TestSendingTensorMapDataSinglePrecedingCount) {
+  using PingMultiPortTensorMapApp = PingMultiPortDataApp<PingTensorMapTxOp, PingTensorMapRxOp>;
+
+  auto app = holoscan::make_application<PingMultiPortTensorMapApp>(
+      true, holoscan::IOSpec::kPrecedingCount);
+
+  // capture output to check that the expected messages were logged
+  testing::internal::CaptureStderr();
+
+  app->run();
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(log_output.find("Rx message received (count: 10, size: 2)") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+
+  EXPECT_TRUE(log_output.find("Rx message values tensor_a (count: 10): [1, 2, 3]") !=
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Rx message values tensor_common (count: 10): [3, 2, 1]") !=
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Rx message values tensor_c (count: 10): [2, 3, 4]") !=
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Rx message values tensor_common (count: 10): [4, 3, 2]") !=
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST(MultiReceiverOperatorPingApp, TestSendingEntityDataMultiAnySize) {
+  using PingMultiPortEntityApp = PingMultiPortDataApp<PingTensorMapTxOp, PingEntityRxOp>;
+
+  auto app = holoscan::make_application<PingMultiPortEntityApp>(true, holoscan::IOSpec::kAnySize);
+
+  // capture output to check that the expected messages were logged
+  testing::internal::CaptureStderr();
+
+  app->run();
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(log_output.find("Rx message received (count: 10, size: 2)") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+
+  EXPECT_TRUE(log_output.find("Rx message values tensor_common (count: 10): [3, 2, 1]") !=
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Rx message values tensor_common (count: 10): [4, 3, 2]") !=
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST(MultiReceiverOperatorPingApp, TestSendingEntityDataMultiPrecedingCount) {
+  using PingMultiPortEntityApp = PingMultiPortDataApp<PingTensorMapTxOp, PingEntityRxOp>;
+
+  auto app =
+      holoscan::make_application<PingMultiPortEntityApp>(true, holoscan::IOSpec::kPrecedingCount);
+
+  // capture output to check that the expected messages were logged
+  testing::internal::CaptureStderr();
+
+  app->run();
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(log_output.find("Rx message received (count: 10, size: 2)") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+
+  EXPECT_TRUE(log_output.find("Rx message values tensor_common (count: 10): [3, 2, 1]") !=
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Rx message values tensor_common (count: 10): [4, 3, 2]") !=
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST(MultiReceiverOperatorPingApp, TestSendingEntityDataSinglePrecedingCount) {
+  using PingMultiPortEntityApp = PingMultiPortDataApp<PingTensorMapTxOp, PingEntityRxOp>;
+
+  auto app =
+      holoscan::make_application<PingMultiPortEntityApp>(false, holoscan::IOSpec::kPrecedingCount);
+
+  // capture output to check that the expected messages were logged
+  testing::internal::CaptureStderr();
+
+  app->run();
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(log_output.find("Rx message values tensor_common (count: 10): [3, 2, 1]") !=
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Rx message values tensor_common (count: 10): [4, 3, 2]") !=
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
+
+TEST(MultiReceiverOperatorPingApp, TestSendingTensorMapDataMultiAnySizeIncorrectReceiver) {
+  using PingMultiPortTensorMapApp = PingMultiPortDataApp<PingTensorMapTxOp, PingRawPtrRxOp>;
+
+  auto app =
+      holoscan::make_application<PingMultiPortTensorMapApp>(true, holoscan::IOSpec::kAnySize);
+
+  // capture output to check that the expected messages were logged
+  testing::internal::CaptureStderr();
+
+  app->run();
+
+  std::string log_output = testing::internal::GetCapturedStderr();
+  EXPECT_TRUE(
+      log_output.find(
+          "Unable to cast the received data to the specified type for input 'receivers:0'") !=
+      std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+}
diff --git a/tests/system/multithreaded_app.cpp b/tests/system/multithreaded_app.cpp
index 4df8fc3..e03b2ee 100644
--- a/tests/system/multithreaded_app.cpp
+++ b/tests/system/multithreaded_app.cpp
@@ -21,11 +21,11 @@
 #include <utility>
 
 #include "holoscan/holoscan.hpp"
-#include <holoscan/operators/bayer_demosaic/bayer_demosaic.hpp>
+#include "holoscan/operators/bayer_demosaic/bayer_demosaic.hpp"
+#include "holoscan/operators/ping_tensor_rx/ping_tensor_rx.hpp"
+#include "holoscan/operators/ping_tensor_tx/ping_tensor_tx.hpp"
 
 #include "env_wrapper.hpp"
-#include "ping_tensor_rx_op.hpp"
-#include "ping_tensor_tx_op.hpp"
 
 constexpr int NUM_RX = 3;
 constexpr int NUM_ITER = 100;
@@ -76,14 +76,15 @@ TEST(MultithreadedApp, TestSendingTensorToMultipleOperators) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("null data") == std::string::npos);
+  EXPECT_TRUE(log_output.find("null data") == std::string::npos) << "=== LOG ===\n"
+                                                                 << log_output << "\n===========\n";
   for (int i = 1; i < NUM_RX; ++i) {
-    EXPECT_TRUE(log_output.find(fmt::format("Rx message value - name:rx{}, data[0]:", i)) !=
-                std::string::npos);
+    EXPECT_TRUE(log_output.find(fmt::format("rx{} received message", i)) != std::string::npos)
+        << "=== LOG ===\n"
+        << log_output << "\n===========\n";
   }
   // Check that the last rx operator received the expected value and print the log if it didn't
-  EXPECT_TRUE(log_output.find(fmt::format("Rx message value - name:rx{}, data[0]:", NUM_RX)) !=
-              std::string::npos)
+  EXPECT_TRUE(log_output.find(fmt::format("rx{} received message", NUM_RX)) != std::string::npos)
       << "=== LOG ===\n"
       << log_output << "\n===========\n";
 }
diff --git a/tests/system/native_async_operator_ping_app.cpp b/tests/system/native_async_operator_ping_app.cpp
index adbcf4d..7b20e76 100644
--- a/tests/system/native_async_operator_ping_app.cpp
+++ b/tests/system/native_async_operator_ping_app.cpp
@@ -91,14 +91,22 @@ TEST_P(ParameterizedAsyncPingTestFixture, TestAsyncRxApp) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("Async ping rx thread entering") != std::string::npos);
-  EXPECT_TRUE(log_output.find("Async ping rx thread exiting") != std::string::npos);
+  EXPECT_TRUE(log_output.find("Async ping rx thread entering") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Async ping rx thread exiting") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
   EXPECT_TRUE(log_output.find("Rx message value: 5") != std::string::npos)
       << "=== LOG ===\n"
       << log_output << "\n===========\n";
 
-  EXPECT_TRUE(log_output.find("Async ping tx thread entering") == std::string::npos);
-  EXPECT_TRUE(log_output.find("Async ping tx thread exiting") == std::string::npos);
+  EXPECT_TRUE(log_output.find("Async ping tx thread entering") == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Async ping tx thread exiting") == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST_P(ParameterizedAsyncPingTestFixture, TestAsyncTxApp) {
@@ -116,14 +124,22 @@ TEST_P(ParameterizedAsyncPingTestFixture, TestAsyncTxApp) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("Async ping tx thread entering") != std::string::npos);
-  EXPECT_TRUE(log_output.find("Async ping tx thread exiting") != std::string::npos);
+  EXPECT_TRUE(log_output.find("Async ping tx thread entering") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Async ping tx thread exiting") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
   EXPECT_TRUE(log_output.find("Rx message value: 5") != std::string::npos)
       << "=== LOG ===\n"
       << log_output << "\n===========\n";
 
-  EXPECT_TRUE(log_output.find("Async ping rx thread entering") == std::string::npos);
-  EXPECT_TRUE(log_output.find("Async ping rx thread exiting") == std::string::npos);
+  EXPECT_TRUE(log_output.find("Async ping rx thread entering") == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Async ping rx thread exiting") == std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST_P(ParameterizedAsyncPingTestFixture, TestAsyncTxRxApp) {
@@ -141,10 +157,18 @@ TEST_P(ParameterizedAsyncPingTestFixture, TestAsyncTxRxApp) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("Async ping tx thread entering") != std::string::npos);
-  EXPECT_TRUE(log_output.find("Async ping rx thread entering") != std::string::npos);
-  EXPECT_TRUE(log_output.find("Async ping tx thread exiting") != std::string::npos);
-  EXPECT_TRUE(log_output.find("Async ping rx thread exiting") != std::string::npos);
+  EXPECT_TRUE(log_output.find("Async ping tx thread entering") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Async ping rx thread entering") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Async ping tx thread exiting") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Async ping rx thread exiting") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
   EXPECT_TRUE(log_output.find("Rx message value: 5") != std::string::npos)
       << "=== LOG ===\n"
       << log_output << "\n===========\n";
diff --git a/tests/system/native_operator_minimal_app.cpp b/tests/system/native_operator_minimal_app.cpp
index 0e91d6f..5a02eca 100644
--- a/tests/system/native_operator_minimal_app.cpp
+++ b/tests/system/native_operator_minimal_app.cpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -40,6 +40,20 @@ class MinimalOp : public Operator {
 
   MinimalOp() = default;
 
+  void initialize() override {
+    HOLOSCAN_LOG_INFO("MinimalOp::initialize() - default value before Operator::initialize(): {}",
+                      value_.default_value());
+    if (value_.has_value()) {
+      HOLOSCAN_LOG_INFO("MinimalOp::initialize() - has value before Operator::initialize(): {}",
+                        value_.get());
+    } else {
+      HOLOSCAN_LOG_INFO("MinimalOp::initialize() - has no value before Operator::initialize()");
+    }
+    Operator::initialize();
+    HOLOSCAN_LOG_INFO("MinimalOp::initialize() - value after Operator::initialize(): {}",
+                      value_.get());
+  }
+
   void setup(OperatorSpec& spec) override {
     spec.param(value_, "value", "value", "value stored by the operator", 2.5);
   }
@@ -106,7 +120,26 @@ TEST(MinimalNativeOperatorApp, TestMinimalNativeOperatorApp) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("value: 5.3") != std::string::npos);
+
+  EXPECT_TRUE(
+      log_output.find("MinimalOp::initialize() - has no value before Operator::initialize()") !=
+      std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find(
+                  "MinimalOp::initialize() - default value before Operator::initialize(): 2.5") !=
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(
+      log_output.find("MinimalOp::initialize() - value after Operator::initialize(): 5.3") !=
+      std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+
+  EXPECT_TRUE(log_output.find("value: 5.3") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(MinimalNativeOperatorApp, TestMinimalNativeOperatorAppMultiThread) {
@@ -134,13 +167,23 @@ TEST(MinimalNativeOperatorApp, TestMinimalNativeOperatorAppMultiThread) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("value: 5.3") != std::string::npos);
+  EXPECT_TRUE(log_output.find("value: 5.3") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
   // check that the expected parameters were sent onto GXF
-  EXPECT_TRUE(log_output.find("setting GXF parameter 'worker_thread_number'") != std::string::npos);
-  EXPECT_TRUE(log_output.find("setting GXF parameter 'stop_on_deadlock'") != std::string::npos);
+  EXPECT_TRUE(log_output.find("setting GXF parameter 'worker_thread_number'") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("setting GXF parameter 'stop_on_deadlock'") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
   EXPECT_TRUE(log_output.find("setting GXF parameter 'check_recession_period_ms'") !=
-              std::string::npos);
-  EXPECT_TRUE(log_output.find("setting GXF parameter 'max_duration_ms'") != std::string::npos);
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("setting GXF parameter 'max_duration_ms'") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 
   // restore the original environment variable
   if (env_orig) {
@@ -198,7 +241,9 @@ TEST(MinimalNativeOperatorApp, TestComplexValueApp) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("value: 5.3+2j") != std::string::npos);
+  EXPECT_TRUE(log_output.find("value: 5.3+2j") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(MinimalNativeOperatorApp, TestComplexValueAppDefault) {
@@ -211,7 +256,9 @@ TEST(MinimalNativeOperatorApp, TestComplexValueAppDefault) {
 
   // did not provide a config file, so the default value will have been used
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("value: 2.5-3j") != std::string::npos);
+  EXPECT_TRUE(log_output.find("value: 2.5-3j") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 }  // namespace holoscan
diff --git a/tests/system/native_operator_multibroadcasts_app.cpp b/tests/system/native_operator_multibroadcasts_app.cpp
index c91daf6..271cec2 100644
--- a/tests/system/native_operator_multibroadcasts_app.cpp
+++ b/tests/system/native_operator_multibroadcasts_app.cpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -73,7 +73,8 @@ TEST(NativeOperatorMultiBroadcastsApp, TestNativeOperatorMultiBroadcastsApp) {
     count++;
     pos = log_output.find(recv_string, pos + recv_string.size());
   }
-  EXPECT_EQ(count, 4);
+  EXPECT_EQ(count, 4) << "expected count of 4, found " << count << "\n=== LOG ===\n"
+                      << log_output << "\n===========\n";
 }
 
 }  // namespace holoscan
diff --git a/tests/system/native_operator_ping_app.cpp b/tests/system/native_operator_ping_app.cpp
index 788dff8..33ef646 100644
--- a/tests/system/native_operator_ping_app.cpp
+++ b/tests/system/native_operator_ping_app.cpp
@@ -172,8 +172,11 @@ TEST(NativeOperatorPingApp, TestNativeOperatorPingApp) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("value1: 1") != std::string::npos);
-  EXPECT_TRUE(log_output.find("value2: 100") != std::string::npos);
+  EXPECT_TRUE(log_output.find("value1: 1") != std::string::npos) << "=== LOG ===\n"
+                                                                 << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("value2: 100") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(NativeOperatorPingApp, TestNativeOperatorForwardApp) {
@@ -188,8 +191,11 @@ TEST(NativeOperatorPingApp, TestNativeOperatorForwardApp) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("value1: 1") != std::string::npos);
-  EXPECT_TRUE(log_output.find("value2: 100") != std::string::npos);
+  EXPECT_TRUE(log_output.find("value1: 1") != std::string::npos) << "=== LOG ===\n"
+                                                                 << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("value2: 100") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(NativeOperatorPingApp, TestNativeForwardOpAppDanglingOutput) {
@@ -207,8 +213,12 @@ TEST(NativeOperatorPingApp, TestNativeForwardOpAppDanglingOutput) {
   // string tested here is from GXF itself, so may have to update it as GXF is updated
   EXPECT_TRUE(log_output.find(
                   "No receiver connected to transmitter of DownstreamReceptiveSchedulingTerm") !=
-              std::string::npos);
-  EXPECT_TRUE(log_output.find("The entity will never tick") != std::string::npos);
+              std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("The entity will never tick") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(NativeOperatorPingApp, TestNativeForwardOpAppDanglingInput) {
@@ -225,7 +235,8 @@ TEST(NativeOperatorPingApp, TestNativeForwardOpAppDanglingInput) {
   std::string log_output = testing::internal::GetCapturedStderr();
   // No error will be logged in the dangling input case, but we can test that the
   // app will deadlock so that value1 is not printed
-  EXPECT_TRUE(log_output.find("value1: ") == std::string::npos);
+  EXPECT_TRUE(log_output.find("value1: ") == std::string::npos) << "=== LOG ===\n"
+                                                                << log_output << "\n===========\n";
 }
 
 }  // namespace holoscan
diff --git a/tests/system/operator_metadata_apps.cpp b/tests/system/operator_metadata_apps.cpp
index 9e47a07..fdeb2fa 100644
--- a/tests/system/operator_metadata_apps.cpp
+++ b/tests/system/operator_metadata_apps.cpp
@@ -434,11 +434,21 @@ TEST(OperatorMetadataApps, TestOperatorMetadataMergeApp) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("rx metadata has 3 keys") != std::string::npos);
-  EXPECT_TRUE(log_output.find("tx label: my title") != std::string::npos);
-  EXPECT_TRUE(log_output.find("fwd date: 2024-07-16") != std::string::npos);
-  EXPECT_TRUE(log_output.find(fmt::format("fwd2 value: {}", count)) != std::string::npos);
-  EXPECT_TRUE(log_output.find("value1 == value2") != std::string::npos);
+  EXPECT_TRUE(log_output.find("rx metadata has 3 keys") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("tx label: my title") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("fwd date: 2024-07-16") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find(fmt::format("fwd2 value: {}", count)) != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("value1 == value2") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(OperatorMetadataApps, TestOperatorMetadataMergeAppPolicyRaise) {
@@ -454,8 +464,11 @@ TEST(OperatorMetadataApps, TestOperatorMetadataMergeAppPolicyRaise) {
   EXPECT_THROW(app->run(), std::runtime_error);
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") != std::string::npos);
-  EXPECT_TRUE(log_output.find("Key 'PingTxMetadataOp.label' already exists") != std::string::npos);
+  EXPECT_TRUE(log_output.find("error") != std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("Key 'PingTxMetadataOp.label' already exists") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(OperatorMetadataApps, TestOperatorMetadataMergeAppTrackingDisabled) {
@@ -470,8 +483,12 @@ TEST(OperatorMetadataApps, TestOperatorMetadataMergeAppTrackingDisabled) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("rx metadata has 0 keys") != std::string::npos);
-  EXPECT_TRUE(log_output.find("value1 == value2") != std::string::npos);
+  EXPECT_TRUE(log_output.find("rx metadata has 0 keys") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("value1 == value2") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 // This app verifies that metadata passes through GXF Codelet-based operators as expected
@@ -487,10 +504,18 @@ TEST(OperatorMetadataApps, TestGXFOperatorMetadataBroadcastApp) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("rx1 metadata has 1 keys") != std::string::npos);
-  EXPECT_TRUE(log_output.find("rx2 metadata has 1 keys") != std::string::npos);
-  EXPECT_TRUE(log_output.find("rx3 metadata has 1 keys") != std::string::npos);
-  EXPECT_TRUE(log_output.find("rx4 metadata has 1 keys") != std::string::npos);
+  EXPECT_TRUE(log_output.find("rx1 metadata has 1 keys") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("rx2 metadata has 1 keys") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("rx3 metadata has 1 keys") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("rx4 metadata has 1 keys") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 // This app verifies that metadata passes through GXF Codelet-based operators as expected
@@ -506,8 +531,12 @@ TEST(OperatorMetadataApps, TestGXFOperatorMetadataApp) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("rx metadata has 1 keys") != std::string::npos);
-  EXPECT_TRUE(log_output.find("tx label: my title") != std::string::npos);
+  EXPECT_TRUE(log_output.find("rx metadata has 1 keys") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
+  EXPECT_TRUE(log_output.find("tx label: my title") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 TEST(OperatorMetadataApps, TestGXFOperatorMetadataAppTrackingDisabled) {
@@ -522,7 +551,9 @@ TEST(OperatorMetadataApps, TestGXFOperatorMetadataAppTrackingDisabled) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("rx metadata has 0 keys") != std::string::npos);
+  EXPECT_TRUE(log_output.find("rx metadata has 0 keys") != std::string::npos)
+      << "=== LOG ===\n"
+      << log_output << "\n===========\n";
 }
 
 // This app tests case with many objects in the metadata dictionary
@@ -539,7 +570,8 @@ TEST(OperatorMetadataApps, TestOperatorLargeMetadataApp) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") == std::string::npos);
+  EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 // This app tests sending metadata in an entity alongside a TensorMap object
@@ -555,7 +587,8 @@ TEST(OperatorMetadataApps, TestTensorMapOperatorsMetadataApp) {
   app->run();
 
   std::string log_output = testing::internal::GetCapturedStderr();
-  EXPECT_TRUE(log_output.find("error") == std::string::npos);
+  EXPECT_TRUE(log_output.find("error") == std::string::npos) << "=== LOG ===\n"
+                                                             << log_output << "\n===========\n";
 }
 
 }  // namespace holoscan
diff --git a/tests/system/ping_tensor_tx_op.cpp b/tests/system/ping_tensor_tx_op.cpp
deleted file mode 100644
index 54daadb..0000000
--- a/tests/system/ping_tensor_tx_op.cpp
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ping_tensor_tx_op.hpp"
-
-#include <cuda_runtime.h>
-
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include <gxf/std/allocator.hpp>
-
-#define CUDA_TRY(stmt)                                                                        \
-  ({                                                                                          \
-    cudaError_t _holoscan_cuda_err = stmt;                                                    \
-    if (cudaSuccess != _holoscan_cuda_err) {                                                  \
-      HOLOSCAN_LOG_ERROR("CUDA Runtime call {} in line {} of file {} failed with '{}' ({}).", \
-                         #stmt,                                                               \
-                         __LINE__,                                                            \
-                         __FILE__,                                                            \
-                         cudaGetErrorString(_holoscan_cuda_err),                              \
-                         _holoscan_cuda_err);                                                 \
-    }                                                                                         \
-    _holoscan_cuda_err;                                                                       \
-  })
-
-namespace holoscan {
-namespace ops {
-
-void PingTensorTxOp::initialize() {
-  // Set up prerequisite parameters before calling Operator::initialize()
-  auto frag = fragment();
-
-  // Find if there is an argument for 'allocator'
-  auto has_allocator = std::find_if(
-      args().begin(), args().end(), [](const auto& arg) { return (arg.name() == "allocator"); });
-  // Create the allocator if there is no argument provided.
-  if (has_allocator == args().end()) {
-    allocator_ = frag->make_resource<UnboundedAllocator>("allocator");
-    add_arg(allocator_.get());
-  }
-  Operator::initialize();
-}
-
-void PingTensorTxOp::setup(OperatorSpec& spec) {
-  spec.output<holoscan::TensorMap>("out");
-
-  spec.param(allocator_, "allocator", "Allocator", "Allocator used to allocate tensor output.");
-  spec.param(storage_type_,
-             "storage_type",
-             "memory storage type",
-             "nvidia::gxf::MemoryStorageType enum indicating where the memory will be stored",
-             std::string("system"));
-  spec.param(batch_size_,
-             "batch_size",
-             "batch size",
-             "Size of the batch dimension (default: 0). The tensor shape will be "
-             "([batch], rows, [columns], [channels]) where [] around a dimension indicates that "
-             "it is only present if the corresponding parameter has a size > 0."
-             "If 0, no batch dimension will be present.",
-             static_cast<int32_t>(0));
-  spec.param(rows_,
-             "rows",
-             "number of rows",
-             "Number of rows (default: 32), must be >= 1.",
-             static_cast<int32_t>(32));
-  spec.param(columns_,
-             "columns",
-             "number of columns",
-             "Number of columns (default: 64). If 0, no column dimension will be present.",
-             static_cast<int32_t>(64));
-  spec.param(
-      channels_,
-      "channels",
-      "channels",
-      "Number of channels (default: 0). If 0, no channel dimension will be present. (default: 0)",
-      static_cast<int32_t>(0));
-  spec.param(data_type_,
-             "data_type",
-             "data type for the tensor elements",
-             "must be one of {'int8_t', 'int16_t', 'int32_t', 'int64_t', 'uint8_t', 'uint16_t',"
-             "'uint32_t', 'uint64_t', 'float', 'double', 'complex<float>', 'complex<double>'}",
-             std::string{"uint8_t"});
-  spec.param(tensor_name_,
-             "tensor_name",
-             "output tensor name",
-             "output tensor name (default: tensor)",
-             std::string{"tensor"});
-}
-
-nvidia::gxf::PrimitiveType PingTensorTxOp::primitive_type(const std::string& data_type) {
-  HOLOSCAN_LOG_INFO("PingTensorTxOp data type = {}", data_type);
-  if (data_type == "int8_t") {
-    return nvidia::gxf::PrimitiveType::kInt8;
-  } else if (data_type == "int16_t") {
-    return nvidia::gxf::PrimitiveType::kInt16;
-  } else if (data_type == "int32_t") {
-    return nvidia::gxf::PrimitiveType::kInt32;
-  } else if (data_type == "int64_t") {
-    return nvidia::gxf::PrimitiveType::kInt64;
-  } else if (data_type == "uint8_t") {
-    return nvidia::gxf::PrimitiveType::kUnsigned8;
-  } else if (data_type == "uint16_t") {
-    return nvidia::gxf::PrimitiveType::kUnsigned16;
-  } else if (data_type == "uint32_t") {
-    return nvidia::gxf::PrimitiveType::kUnsigned32;
-  } else if (data_type == "uint64_t") {
-    return nvidia::gxf::PrimitiveType::kUnsigned64;
-  } else if (data_type == "float") {
-    return nvidia::gxf::PrimitiveType::kFloat32;
-  } else if (data_type == "double") {
-    return nvidia::gxf::PrimitiveType::kFloat64;
-  } else if (data_type == "complex<float>") {
-    return nvidia::gxf::PrimitiveType::kComplex64;
-  } else if (data_type == "complex<double>") {
-    return nvidia::gxf::PrimitiveType::kComplex128;
-  }
-  throw std::runtime_error(std::string("Unrecognized data_type: ") + data_type);
-}
-
-void PingTensorTxOp::compute(InputContext&, OutputContext& op_output, ExecutionContext& context) {
-  // the type of out_message is TensorMap
-  TensorMap out_message;
-
-  auto gxf_context = context.context();
-  auto frag = fragment();
-
-  // get Handle to underlying nvidia::gxf::Allocator from std::shared_ptr<holoscan::Allocator>
-  auto allocator =
-      nvidia::gxf::Handle<nvidia::gxf::Allocator>::Create(gxf_context, allocator_->gxf_cid());
-
-  auto gxf_tensor = std::make_shared<nvidia::gxf::Tensor>();
-
-  // Define the dimensions for the CUDA memory (64 x 32, uint8).
-  int batch_size = batch_size_.get();
-  int rows = rows_.get();
-  int columns = columns_.get();
-  int channels = channels_.get();
-  auto dtype = element_type();
-
-  std::vector<int32_t> shape_vec;
-  if (batch_size > 0) { shape_vec.push_back(batch_size); }
-  shape_vec.push_back(rows);
-  if (columns > 0) { shape_vec.push_back(columns); }
-  if (channels > 0) { shape_vec.push_back(channels); }
-  auto tensor_shape = nvidia::gxf::Shape{shape_vec};
-
-  const uint64_t bytes_per_element = nvidia::gxf::PrimitiveTypeSize(dtype);
-  auto strides = nvidia::gxf::ComputeTrivialStrides(tensor_shape, bytes_per_element);
-  nvidia::gxf::MemoryStorageType storage_type;
-  auto storage_name = storage_type_.get();
-  HOLOSCAN_LOG_DEBUG("storage_name = {}", storage_name);
-  if (storage_name == std::string("device")) {
-    storage_type = nvidia::gxf::MemoryStorageType::kDevice;
-  } else if (storage_name == std::string("host")) {
-    storage_type = nvidia::gxf::MemoryStorageType::kHost;
-  } else if (storage_name == std::string("system")) {
-    storage_type = nvidia::gxf::MemoryStorageType::kSystem;
-  } else {
-    throw std::runtime_error(fmt::format(
-        "Unrecognized storage_device ({}), should be one of {'device', 'host', 'system'}",
-        storage_name));
-  }
-
-  // allocate a tensor of the specified shape and data type
-  auto result = gxf_tensor->reshapeCustom(
-      tensor_shape, dtype, bytes_per_element, strides, storage_type, allocator.value());
-  if (!result) { HOLOSCAN_LOG_ERROR("failed to generate tensor"); }
-
-  // Create Holoscan tensor
-  auto maybe_dl_ctx = (*gxf_tensor).toDLManagedTensorContext();
-  if (!maybe_dl_ctx) {
-    HOLOSCAN_LOG_ERROR(
-        "failed to get std::shared_ptr<DLManagedTensorContext> from nvidia::gxf::Tensor");
-  }
-  std::shared_ptr<Tensor> holoscan_tensor = std::make_shared<Tensor>(maybe_dl_ctx.value());
-
-  // insert tensor into the TensorMap
-  out_message.insert({tensor_name_.get().c_str(), holoscan_tensor});
-
-  op_output.emit(out_message);
-}
-
-}  // namespace ops
-}  // namespace holoscan
diff --git a/tests/system/ping_tensor_tx_op.hpp b/tests/system/ping_tensor_tx_op.hpp
deleted file mode 100644
index eabbd69..0000000
--- a/tests/system/ping_tensor_tx_op.hpp
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef SYSTEM_PING_TENSOR_TX_OP_HPP
-#define SYSTEM_PING_TENSOR_TX_OP_HPP
-
-#include <memory>
-#include <string>
-
-#include <holoscan/holoscan.hpp>
-
-namespace holoscan {
-namespace ops {
-
-class PingTensorTxOp : public holoscan::Operator {
- public:
-  HOLOSCAN_OPERATOR_FORWARD_ARGS(PingTensorTxOp)
-
-  PingTensorTxOp() = default;
-
-  void initialize() override;
-  void setup(OperatorSpec& spec) override;
-  void compute(InputContext&, OutputContext& op_output, ExecutionContext& context) override;
-
-  nvidia::gxf::PrimitiveType element_type() {
-    if (element_type_.has_value()) { return element_type_.value(); }
-    element_type_ = primitive_type(data_type_.get());
-    return element_type_.value();
-  }
-
- private:
-  nvidia::gxf::PrimitiveType primitive_type(const std::string& data_type);
-  std::optional<nvidia::gxf::PrimitiveType> element_type_;
-
-  Parameter<std::shared_ptr<Allocator>> allocator_;
-  Parameter<std::string> storage_type_;
-  Parameter<int32_t> batch_size_;
-  Parameter<int32_t> rows_;
-  Parameter<int32_t> columns_;
-  Parameter<int32_t> channels_;
-  Parameter<std::string> data_type_;
-  Parameter<std::string> tensor_name_;
-};
-
-}  // namespace ops
-}  // namespace holoscan
-
-#endif /* SYSTEM_PING_TENSOR_TX_OP_HPP */
diff --git a/tests/system/tensor_compare_op.cpp b/tests/system/tensor_compare_op.cpp
index 1cfb5e5..29d1470 100644
--- a/tests/system/tensor_compare_op.cpp
+++ b/tests/system/tensor_compare_op.cpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -22,20 +22,7 @@
 #include <algorithm>
 #include <string>
 #include <vector>
-
-#define CUDA_TRY(stmt)                                                                        \
-  ({                                                                                          \
-    cudaError_t _holoscan_cuda_err = stmt;                                                    \
-    if (cudaSuccess != _holoscan_cuda_err) {                                                  \
-      HOLOSCAN_LOG_ERROR("CUDA Runtime call {} in line {} of file {} failed with '{}' ({}).", \
-                         #stmt,                                                               \
-                         __LINE__,                                                            \
-                         __FILE__,                                                            \
-                         cudaGetErrorString(_holoscan_cuda_err),                              \
-                         _holoscan_cuda_err);                                                 \
-    }                                                                                         \
-    _holoscan_cuda_err;                                                                       \
-  })
+#include "holoscan/utils/cuda_macros.hpp"
 
 namespace holoscan {
 namespace ops {
@@ -67,10 +54,12 @@ void TensorCompareOp::compute(InputContext& op_input, OutputContext&, ExecutionC
   }
 
   std::vector<uint8_t> data1(tensor1->nbytes());
-  CUDA_TRY(cudaMemcpy(data1.data(), tensor1->data(), tensor1->nbytes(), cudaMemcpyDeviceToHost));
+  HOLOSCAN_CUDA_CALL(
+      cudaMemcpy(data1.data(), tensor1->data(), tensor1->nbytes(), cudaMemcpyDeviceToHost));
 
   std::vector<uint8_t> data2(tensor2->nbytes());
-  CUDA_TRY(cudaMemcpy(data2.data(), tensor2->data(), tensor2->nbytes(), cudaMemcpyDeviceToHost));
+  HOLOSCAN_CUDA_CALL(
+      cudaMemcpy(data2.data(), tensor2->data(), tensor2->nbytes(), cudaMemcpyDeviceToHost));
 
   auto result = std::mismatch(data1.begin(), data1.end(), data2.begin());
   if (result.first != data1.end()) {
diff --git a/tests/system/video_stream_replayer_op.cpp b/tests/system/video_stream_replayer_op.cpp
new file mode 100644
index 0000000..c68701c
--- /dev/null
+++ b/tests/system/video_stream_replayer_op.cpp
@@ -0,0 +1,118 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "../config.hpp"
+#include "tensor_compare_op.hpp"
+
+#include <holoscan/holoscan.hpp>
+#include <holoscan/operators/video_stream_replayer/video_stream_replayer.hpp>
+#include <holoscan/operators/holoviz/holoviz.hpp>
+
+using namespace holoscan;
+
+static HoloscanTestConfig test_config;
+
+class VideoReplayerApp : public holoscan::Application {
+ public:
+  void compose() override {
+    using namespace holoscan;
+
+    // Sets the data directory to use from the environment variable if it is set
+    ArgList args;
+    auto data_directory = std::getenv("HOLOSCAN_INPUT_PATH");
+    if (data_directory != nullptr && data_directory[0] != '\0') {
+      auto video_directory = std::filesystem::path(data_directory);
+      video_directory /= "racerx";
+      args.add(Arg("directory", video_directory.string()));
+    }
+
+    if (use_allocator_args_ || use_entity_serializer_arg_) {
+      if (use_entity_serializer_arg_) {
+        auto entity_serializer =
+            make_resource<holoscan::StdEntitySerializer>("video_entity_serializer");
+        args.add(Arg("entity_serializer", entity_serializer));
+      } else {
+        // the video data has a header that indicates device memory
+        args.add(Arg("allocator", make_resource<UnboundedAllocator>("video_replayer_allocator")));
+      }
+    }
+
+    // Define the replayer and holoviz operators and configure using yaml configuration
+    auto replayer =
+        make_operator<ops::VideoStreamReplayerOp>("replayer", from_config("replayer"), args);
+    auto visualizer = make_operator<ops::HolovizOp>("holoviz", from_config("holoviz"));
+
+    // Define the workflow: replayer -> holoviz
+    add_flow(replayer, visualizer, {{"output", "receivers"}});
+  }
+
+  void set_use_entity_serializer(bool use_entity_serializer_arg) {
+    use_entity_serializer_arg_ = use_entity_serializer_arg;
+  }
+  void set_use_allocators(bool use_allocator_args) { use_allocator_args_ = use_allocator_args; }
+
+ private:
+  bool use_entity_serializer_arg_ = false;
+  bool use_allocator_args_ = false;
+};
+
+void run_app(bool use_allocator_args = false, bool use_entity_serializer_arg = false,
+             const std::string& failure_str = "") {
+  auto app = make_application<VideoReplayerApp>();
+
+  const std::string config_file = test_config.get_test_data_file("video_replayer_apps.yaml");
+  app->config(config_file);
+
+  app->set_use_allocators(use_allocator_args);
+  app->set_use_entity_serializer(use_entity_serializer_arg);
+
+  // capture output to check that the expected messages were logged
+  testing::internal::CaptureStderr();
+  try {
+    app->run();
+  } catch (const std::exception& ex) {
+    GTEST_FATAL_FAILURE_(
+        fmt::format("{}{}", testing::internal::GetCapturedStderr(), ex.what()).c_str());
+  }
+  std::string log_output = testing::internal::GetCapturedStderr();
+  if (failure_str.empty()) {
+    EXPECT_TRUE(log_output.find("error") == std::string::npos) << log_output;
+  } else {
+    EXPECT_TRUE(log_output.find(failure_str) != std::string::npos) << log_output;
+  }
+}
+
+// run app without providing entity_serializer or allocaor argument
+TEST(VideoStreamReplayerApps, TestDefaultEntitySerializer) {
+  run_app(false, false);
+}
+
+// run app providing allocator argument
+TEST(VideoStreamReplayerApps, TestUserProvidedAllocator) {
+  run_app(true, false);
+}
+
+// run app providing entity_serializer argument
+TEST(VideoStreamReplayerApps, TestUserProvidedEntitySerializer) {
+  run_app(false, true);
+}