diff --git a/CMakeLists.txt b/CMakeLists.txt
index 95ca994..3955a6b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,7 +28,7 @@ add_definitions("-DSOURCE_LENGTH=${SOURCE_LENGTH}")
 # Version information
 #--------------------------------------------------
 set(ONNX2TRT_MAJOR 10)
-set(ONNX2TRT_MINOR 3)
+set(ONNX2TRT_MINOR 4)
 set(ONNX2TRT_PATCH 0)
 set(ONNX2TRT_VERSION "${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}.${ONNX2TRT_PATCH}" CACHE STRING "ONNX2TRT version")
 
diff --git a/ImporterContext.cpp b/ImporterContext.cpp
index 7f74757..fdd3dbd 100644
--- a/ImporterContext.cpp
+++ b/ImporterContext.cpp
@@ -134,7 +134,10 @@ void ImporterContext::registerLayer(nvinfer1::ILayer* layer, std::string const&
             mConstantLayers.insert({uniqueName, static_cast<nvinfer1::IConstantLayer*>(layer)});
         }
     }
-    if (node != nullptr && layer != nullptr)
+    // Set metadata only if the layer is associated with an ONNX node.
+    // Skip constant layers because constants are represented as initializers in ONNX and should not be associated
+    // with any ONNX node.
+    if (node != nullptr && layer != nullptr && layer->getType() != nvinfer1::LayerType::kCONSTANT)
     {
         processMetadata(this, *node, layer);
     }
diff --git a/ModelImporter.cpp b/ModelImporter.cpp
index 48c7a29..39b2505 100644
--- a/ModelImporter.cpp
+++ b/ModelImporter.cpp
@@ -439,6 +439,17 @@ Status importLocalFunctions(ImporterContext* ctx, ::ONNX_NAMESPACE::ModelProto c
     return Status::success();
 }
 
+// Internal helper function used for ONNXRT-TRT EP to filter out DDS nodes
+bool isDDSOp(char const* op_name)
+{
+    auto is = [op_name](char const* name) { return std::strcmp(op_name, name) == 0; };
+    if (is("NonMaxSuppression") || is("NonZero") || is("RoiAlign"))
+    {
+        return true;
+    }
+    return false;
+}
+
 std::pair<bool, ModelImporter::SubGraphSupportVector_t> ModelImporter::doSupportsModel(
     void const* serialized_onnx_model, size_t serialized_onnx_model_size, char const* model_path)
 {
@@ -514,9 +525,10 @@ std::pair<bool, ModelImporter::SubGraphSupportVector_t> ModelImporter::doSupport
         //     1. It is not a node that requires DDS
         //     2. It is not directly connected to an unsupported input
         //     3. The importer function did not throw an assertion
+        bool unsupportedDDS = isDDSOp(node.op_type().c_str());
         bool unsupportedInput = (input_node.empty()) ? false : checkForInput(node);
         bool unsuccessfulParse = node_idx == error_node;
-        if (!unsupportedInput && !unsuccessfulParse)
+        if (!unsupportedDDS && !unsupportedInput && !unsuccessfulParse)
         {
             if (newSubGraph)
             {
diff --git a/ModelRefitter.cpp b/ModelRefitter.cpp
index 4416569..a053786 100644
--- a/ModelRefitter.cpp
+++ b/ModelRefitter.cpp
@@ -20,7 +20,7 @@ namespace onnx2trt
 {
 namespace
 {
-Status deserializeOnnxModelFile(char const* onnxModelFile, ::ONNX_NAMESPACE::ModelProto& onnx_model)
+void deserializeOnnxModelFile(char const* onnxModelFile, ::ONNX_NAMESPACE::ModelProto& onnx_model)
 {
     // Define S_ISREG macro for Windows
 #if !defined(S_ISREG)
@@ -28,15 +28,14 @@ Status deserializeOnnxModelFile(char const* onnxModelFile, ::ONNX_NAMESPACE::Mod
 #endif
 
     struct stat sb;
-    ASSERT(!(stat(onnxModelFile, &sb) == 0 && !S_ISREG(sb.st_mode))
-            && "Failed to parse the ONNX model; input is not a regular file.",
-        ErrorCode::kMODEL_DESERIALIZE_FAILED);
+    ONNXTRT_CHECK(!(stat(onnxModelFile, &sb) == 0 && !S_ISREG(sb.st_mode)),
+        MAKE_ERROR(
+            "Failed to parse the ONNX model; input is not a regular file.", ErrorCode::kMODEL_DESERIALIZE_FAILED));
 
     GOOGLE_PROTOBUF_VERIFY_VERSION;
 
     bool const fileLoadSuccess = ParseFromFileAsBinary(&onnx_model, onnxModelFile);
-    ASSERT(fileLoadSuccess && "Failed to parse the ONNX model!", ErrorCode::kMODEL_DESERIALIZE_FAILED);
-    return Status::success();
+    ONNXTRT_CHECK(fileLoadSuccess, MAKE_ERROR("Failed to parse the ONNX model!", ErrorCode::kMODEL_DESERIALIZE_FAILED));
 }
 } // anonymous namespace
 
@@ -49,7 +48,7 @@ std::unordered_set<std::string> ModelRefitter::getRefittableWeights()
 }
 
 template <typename T, typename TConvertFunc>
-ValueOrStatus<size_t> ModelRefitter::batchnormWeightRefitter(
+size_t ModelRefitter::batchnormWeightRefitter(
     ::ONNX_NAMESPACE::NodeProto const& node, std::vector<ShapedWeights>& inputs, TConvertFunc&& f)
 {
     auto const& scale = inputs.at(0);
@@ -88,7 +87,8 @@ ValueOrStatus<size_t> ModelRefitter::batchnormWeightRefitter(
     // Validate that all the weights have the same amount of values
     bool allSame = scale.count() == bias.count() && mean.count() == scale.count() && variance.count() == scale.count()
         && combinedScale.count() == scale.count() && combinedBias.count() == scale.count();
-    ASSERT(allSame && "Inputs to BatchNormalization must have the same shape!", ErrorCode::kREFIT_FAILED);
+    ONNXTRT_CHECK(
+        allSame, MAKE_ERROR("Inputs to BatchNormalization must have the same shape!", ErrorCode::kREFIT_FAILED));
 
     for (int32_t i = 0; i < nbChannels; ++i)
     {
@@ -99,16 +99,15 @@ ValueOrStatus<size_t> ModelRefitter::batchnormWeightRefitter(
     if (refittableWeights.count(combinedScale.name))
     {
         refittableWeights.erase(combinedScale.name);
-        ASSERT(
-            mRefitter->setNamedWeights(combinedScale.name, std::move(combinedScale)) && "Failed to set named weights",
-            ErrorCode::kREFIT_FAILED);
+        ONNXTRT_CHECK(mRefitter->setNamedWeights(combinedScale.name, std::move(combinedScale)),
+            MAKE_ERROR("Failed to set named weights", ErrorCode::kREFIT_FAILED));
         ++successfullyRefittedWeights;
     }
     if (refittableWeights.count(combinedBias.name))
     {
         refittableWeights.erase(combinedBias.name);
-        ASSERT(mRefitter->setNamedWeights(combinedBias.name, std::move(combinedBias)) && "Failed to set named weights",
-            ErrorCode::kREFIT_FAILED);
+        ONNXTRT_CHECK(mRefitter->setNamedWeights(combinedBias.name, std::move(combinedBias)),
+            MAKE_ERROR("Failed to set named weights", ErrorCode::kREFIT_FAILED));
         ++successfullyRefittedWeights;
     }
     return successfullyRefittedWeights;
@@ -125,18 +124,17 @@ class QuickCast
     };
 };
 
-Status ModelRefitter::refitOnnxWeights(::ONNX_NAMESPACE::ModelProto const& onnx_model)
+void ModelRefitter::refitOnnxWeights(::ONNX_NAMESPACE::ModelProto const& onnx_model)
 {
     nestedDepth = 0;
     successfullyRefittedWeights = 0;
     size_t const numberOfWeightsToRefit = refittableWeights.size();
-    CHECK_STATUS(refitOnnxGraph(onnx_model.graph()));
-    ASSERT(successfullyRefittedWeights == numberOfWeightsToRefit && "Failed to refit all the weights.",
-        ErrorCode::kREFIT_FAILED);
-    return Status::success();
+    refitOnnxGraph(onnx_model.graph());
+    ONNXTRT_CHECK(successfullyRefittedWeights == numberOfWeightsToRefit,
+        MAKE_ERROR("Failed to refit all the weights.", ErrorCode::kREFIT_FAILED));
 }
 
-Status ModelRefitter::refitOnnxGraph(::ONNX_NAMESPACE::GraphProto const& graph)
+void ModelRefitter::refitOnnxGraph(::ONNX_NAMESPACE::GraphProto const& graph)
 {
     for (::ONNX_NAMESPACE::TensorProto const& initializer : graph.initializer())
     {
@@ -159,66 +157,62 @@ Status ModelRefitter::refitOnnxGraph(::ONNX_NAMESPACE::GraphProto const& graph)
             refittedWeights.insert(initializer.name());
         }
         ShapedWeights weights;
-        ASSERT(mWeightsContext.convertOnnxWeights(initializer, &weights, /*ownAllWeights=*/true)
-                && "Failed to import initializer.",
-            ErrorCode::kUNSUPPORTED_NODE);
-        ASSERT(
-            mRefitter->setNamedWeights(initializer.name().c_str(), std::move(weights)) && "Failed to set named weights",
-            ErrorCode::kREFIT_FAILED);
+        ONNXTRT_CHECK(mWeightsContext.convertOnnxWeights(initializer, &weights, /*ownAllWeights=*/true),
+            MAKE_ERROR("Failed to import initializer.", ErrorCode::kUNSUPPORTED_NODE));
+        ONNXTRT_CHECK(mRefitter->setNamedWeights(initializer.name().c_str(), std::move(weights)),
+            MAKE_ERROR("Failed to set named weights", ErrorCode::kREFIT_FAILED));
         ++successfullyRefittedWeights;
     }
 
     std::vector<size_t> topoOrder;
-    ASSERT(toposort(graph.node(), &topoOrder) && "Failed to sort the model topologically.", ErrorCode::kINVALID_GRAPH);
+    ONNXTRT_CHECK(toposort(graph.node(), &topoOrder),
+        MAKE_ERROR("Failed to sort the model topologically.", ErrorCode::kINVALID_GRAPH));
 
     for (auto const& nodeIdx : topoOrder)
     {
         ::ONNX_NAMESPACE::NodeProto const& node = graph.node(nodeIdx);
-        CHECK_STATUS(refitOnnxNode(node, graph));
+        refitOnnxNode(node, graph);
     }
-    return Status::success();
 }
 
-Status ModelRefitter::refitOnnxNode(::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph)
+void ModelRefitter::refitOnnxNode(::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph)
 {
     // For nodes that contain subgraphs (Ifs, Loops, Scans),
     // ensure that the recursion depth is limited to a set amount.
     ++nestedDepth;
     static size_t const MAX_NESTED_SUBGRAPHS = 24;
-    ASSERT((nestedDepth <= MAX_NESTED_SUBGRAPHS)
-            && "ONNX graph contains nested structures that exceed the maximum allowed by TensorRT!",
-        ErrorCode::kUNSUPPORTED_GRAPH);
+    ONNXTRT_CHECK((nestedDepth <= MAX_NESTED_SUBGRAPHS),
+        MAKE_ERROR("ONNX graph contains nested structures that exceed the maximum allowed by TensorRT!",
+            ErrorCode::kUNSUPPORTED_GRAPH));
 
-    Status status{ErrorCode::kSUCCESS};
     if (node.op_type() == "Constant")
     {
-        status = refitOnnxConstantNode(node, graph.name());
+        refitOnnxConstantNode(node, graph.name());
     }
     else if (node.op_type() == "BatchNormalization")
     {
-        status = refitOnnxBatchNormNode(node, graph);
+        refitOnnxBatchNormNode(node, graph);
     }
     else if (node.op_type() == "If")
     {
-        status = refitOnnxIfNode(node);
+        refitOnnxIfNode(node);
     }
     else if (node.op_type() == "Loop")
     {
-        status = refitOnnxLoopNode(node);
+        refitOnnxLoopNode(node);
     }
     else if (node.op_type() == "Scan")
     {
-        status = refitOnnxScanNode(node);
+        refitOnnxScanNode(node);
     }
     --nestedDepth;
-    return status;
 }
 
-Status ModelRefitter::refitOnnxConstantNode(::ONNX_NAMESPACE::NodeProto const& node, std::string const& graphName)
+void ModelRefitter::refitOnnxConstantNode(::ONNX_NAMESPACE::NodeProto const& node, std::string const& graphName)
 {
     if (!refittableWeights.count(node.output(0)))
     {
-        return Status::success();
+        return;
     }
     refittableWeights.erase(node.output(0));
     if (refittedWeights.count(node.output(0)))
@@ -237,7 +231,8 @@ Status ModelRefitter::refitOnnxConstantNode(::ONNX_NAMESPACE::NodeProto const& n
     {
         weights = mWeightsContext.createTempWeights(::ONNX_NAMESPACE::TensorProto::FLOAT, {0, {}});
         float value = nodeAttribute.f();
-        ASSERT(weights.count() == 1 && "Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE);
+        ONNXTRT_CHECK(
+            weights.count() == 1, MAKE_ERROR("Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE));
         std::memcpy(weights.values, &value, sizeof(float));
     }
     else if (nodeAttribute.name() == "value_floats")
@@ -245,14 +240,16 @@ Status ModelRefitter::refitOnnxConstantNode(::ONNX_NAMESPACE::NodeProto const& n
         std::vector<float> values{nodeAttribute.floats().begin(), nodeAttribute.floats().end()};
         int64_t valueSize = values.size();
         weights = mWeightsContext.createTempWeights(::ONNX_NAMESPACE::TensorProto::FLOAT, {1, {valueSize}});
-        ASSERT(weights.count() == values.size() && "Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE);
+        ONNXTRT_CHECK(weights.count() == values.size(),
+            MAKE_ERROR("Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE));
         std::memcpy(weights.values, values.data(), weights.count() * sizeof(float));
     }
     else if (nodeAttribute.name() == "value_int")
     {
         weights = mWeightsContext.createTempWeights(::ONNX_NAMESPACE::TensorProto::INT64, {0, {}});
         int64_t value = nodeAttribute.i();
-        ASSERT(weights.count() == 1 && "Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE);
+        ONNXTRT_CHECK(
+            weights.count() == 1, MAKE_ERROR("Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE));
         std::memcpy(weights.values, &value, sizeof(int64_t));
     }
     else if (nodeAttribute.name() == "value_ints")
@@ -260,25 +257,26 @@ Status ModelRefitter::refitOnnxConstantNode(::ONNX_NAMESPACE::NodeProto const& n
         std::vector<int64_t> values{nodeAttribute.ints().begin(), nodeAttribute.ints().end()};
         int64_t valueSize = values.size();
         weights = mWeightsContext.createTempWeights(::ONNX_NAMESPACE::TensorProto::INT64, {1, {valueSize}});
-        ASSERT(weights.count() == values.size() && "Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE);
+        ONNXTRT_CHECK(weights.count() == values.size(),
+            MAKE_ERROR("Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE));
         std::memcpy(weights.values, values.data(), weights.count() * sizeof(int64_t));
     }
     else
     {
         ::ONNX_NAMESPACE::TensorProto const& onnx_weights_tensor = nodeAttribute.t();
-        ASSERT(mWeightsContext.convertOnnxWeights(onnx_weights_tensor, &weights) && "Failed to import Constant node.",
-            ErrorCode::kUNSUPPORTED_NODE);
+        ONNXTRT_CHECK(mWeightsContext.convertOnnxWeights(onnx_weights_tensor, &weights),
+            MAKE_ERROR("Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE));
     }
-    ASSERT(mRefitter->setNamedWeights(node.output(0).c_str(), std::move(weights)) && "Failed to set named weights",
-        ErrorCode::kREFIT_FAILED);
+    ONNXTRT_CHECK(mRefitter->setNamedWeights(node.output(0).c_str(), std::move(weights)),
+        MAKE_ERROR("Failed to set named weights", ErrorCode::kREFIT_FAILED));
     ++successfullyRefittedWeights;
-    return Status::success();
 }
 
-Status ModelRefitter::refitOnnxBatchNormNode(
+void ModelRefitter::refitOnnxBatchNormNode(
     ::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph)
 {
-    ASSERT(node.input().size() == 5 && "BatchNorm node does not have five required inputs.", ErrorCode::kINVALID_NODE);
+    ONNXTRT_CHECK(node.input().size() == 5,
+        MAKE_ERROR("BatchNorm node does not have five required inputs.", ErrorCode::kINVALID_NODE));
     std::vector<ShapedWeights> batchNormInputs;
     // The following looping construct is due to the fact that some tensors
     // might be shared among the BatchNorm's inputs
@@ -290,8 +288,8 @@ Status ModelRefitter::refitOnnxBatchNormNode(
             if (inputNames.at(inputIdx) == initializer.name())
             {
                 ShapedWeights weights;
-                ASSERT(mWeightsContext.convertOnnxWeights(initializer, &weights) && "Failed to import initializer.",
-                    ErrorCode::kUNSUPPORTED_NODE);
+                ONNXTRT_CHECK(mWeightsContext.convertOnnxWeights(initializer, &weights),
+                    MAKE_ERROR("Failed to import initializer.", ErrorCode::kUNSUPPORTED_NODE));
                 weights.name = initializer.name().c_str();
                 batchNormInputs.push_back(std::move(weights));
                 break;
@@ -304,9 +302,9 @@ Status ModelRefitter::refitOnnxBatchNormNode(
     // we must have already refitted the weights directly in refitOnnxGraph()
     if (batchNormInputs.size() < 4)
     {
-        return Status::success();
+        return;
     }
-    ValueOrStatus<size_t> batchnormRefittedWeights{0};
+    size_t batchnormRefittedWeights{0};
     auto const scaleType = batchNormInputs.at(0).type;
     bool const typesEqual = scaleType == batchNormInputs.at(1).type && scaleType == batchNormInputs.at(2).type
         && scaleType == batchNormInputs.at(3).type;
@@ -314,34 +312,21 @@ Status ModelRefitter::refitOnnxBatchNormNode(
     {
         batchnormRefittedWeights
             = batchnormWeightRefitter<half_float::half>(node, batchNormInputs, QuickCast<half_float::half>());
-        if (batchnormRefittedWeights.is_error())
-        {
-            return batchnormRefittedWeights.error();
-        }
     }
     else if (typesEqual && scaleType == ::ONNX_NAMESPACE::TensorProto::BFLOAT16)
     {
         batchnormRefittedWeights = batchnormWeightRefitter<BFloat16>(node, batchNormInputs, QuickCast<BFloat16>());
-        if (batchnormRefittedWeights.is_error())
-        {
-            return batchnormRefittedWeights.error();
-        }
     }
     else
     {
         // Do calculations in FP32, possibly promoting/demoting arithmetic types of some operands.
         batchnormRefittedWeights = batchnormWeightRefitter<float>(
             node, batchNormInputs, [this](ShapedWeights const& w) { return mWeightsContext.getFP32Values(w); });
-        if (batchnormRefittedWeights.is_error())
-        {
-            return batchnormRefittedWeights.error();
-        }
     }
-    successfullyRefittedWeights += batchnormRefittedWeights.value();
-    return Status::success();
+    successfullyRefittedWeights += batchnormRefittedWeights;
 }
 
-Status ModelRefitter::refitOnnxIfNode(::ONNX_NAMESPACE::NodeProto const& node)
+void ModelRefitter::refitOnnxIfNode(::ONNX_NAMESPACE::NodeProto const& node)
 {
     size_t thenGraphOutputSize{};
     size_t elseGraphOutputSize{};
@@ -350,44 +335,40 @@ Status ModelRefitter::refitOnnxIfNode(::ONNX_NAMESPACE::NodeProto const& node)
         if (attr.name() == "then_branch")
         {
             ::ONNX_NAMESPACE::GraphProto const& thenGraph = static_cast<::ONNX_NAMESPACE::GraphProto const&>(attr.g());
-            CHECK_STATUS(refitOnnxGraph(thenGraph));
+            refitOnnxGraph(thenGraph);
             thenGraphOutputSize = thenGraph.output_size();
         }
         else if (attr.name() == "else_branch")
         {
             ::ONNX_NAMESPACE::GraphProto const& elseGraph = static_cast<::ONNX_NAMESPACE::GraphProto const&>(attr.g());
-            CHECK_STATUS(refitOnnxGraph(elseGraph));
+            refitOnnxGraph(elseGraph);
             elseGraphOutputSize = elseGraph.output_size();
         }
     }
 
     // Number of outputs are the same between the two branches.
-    ASSERT(thenGraphOutputSize == elseGraphOutputSize
-            && "then/else subgraphs within the IF node should have the same number of outputs",
-        ErrorCode::kREFIT_FAILED);
-
-    return Status::success();
+    ONNXTRT_CHECK(thenGraphOutputSize == elseGraphOutputSize,
+        MAKE_ERROR(
+            "then/else subgraphs within the IF node should have the same number of outputs", ErrorCode::kREFIT_FAILED));
 }
 
-Status ModelRefitter::refitOnnxLoopNode(::ONNX_NAMESPACE::NodeProto const& node)
+void ModelRefitter::refitOnnxLoopNode(::ONNX_NAMESPACE::NodeProto const& node)
 {
     ::ONNX_NAMESPACE::GraphProto const& body = static_cast<::ONNX_NAMESPACE::GraphProto const&>(node.attribute(0).g());
-    CHECK_STATUS(refitOnnxGraph(body));
-    return Status::success();
+    refitOnnxGraph(body);
 }
 
-Status ModelRefitter::refitOnnxScanNode(::ONNX_NAMESPACE::NodeProto const& node)
+void ModelRefitter::refitOnnxScanNode(::ONNX_NAMESPACE::NodeProto const& node)
 {
     for (auto const& attr : node.attribute())
     {
         if (attr.name() == "body")
         {
             ::ONNX_NAMESPACE::GraphProto const& body = static_cast<::ONNX_NAMESPACE::GraphProto const&>(attr.g());
-            CHECK_STATUS(refitOnnxGraph(body));
+            refitOnnxGraph(body);
             break;
         }
     }
-    return Status::success();
 }
 
 bool ModelRefitter::refitFromBytes(
@@ -401,20 +382,10 @@ bool ModelRefitter::refitFromBytes(
             mWeightsContext.setOnnxFileLocation(modelPath);
         }
 
-        Status status = deserializeOnnxModel(serializedOnnxModel, serializedOnnxModelSize, &onnx_model);
-        if (status.is_error())
-        {
-            mErrors.push_back(status);
-            return false;
-        }
+        deserializeOnnxModel(serializedOnnxModel, serializedOnnxModelSize, &onnx_model);
 
         refittableWeights = getRefittableWeights();
-        status = refitOnnxWeights(onnx_model);
-        if (status.is_error())
-        {
-            mErrors.push_back(status);
-            return false;
-        }
+        refitOnnxWeights(onnx_model);
         return true;
     }
     ONNXTRT_CATCH_LOG(mLogger)
@@ -428,22 +399,11 @@ bool ModelRefitter::refitFromFile(char const* onnxModelFile) noexcept
         // Keep track of the absolute path to the ONNX file.
         mWeightsContext.setOnnxFileLocation(onnxModelFile);
 
-        Status status = deserializeOnnxModelFile(onnxModelFile, onnx_model);
-        if (status.is_error())
-        {
-            mErrors.push_back(status);
-            return false;
-        }
-
+        deserializeOnnxModelFile(onnxModelFile, onnx_model);
         refittableWeights = getRefittableWeights();
         if (!refittableWeights.empty())
         {
-            status = refitOnnxWeights(onnx_model);
-            if (status.is_error())
-            {
-                mErrors.push_back(status);
-                return false;
-            }
+            refitOnnxWeights(onnx_model);
         }
         return true;
     }
diff --git a/ModelRefitter.hpp b/ModelRefitter.hpp
index d2a78ca..5b656f0 100644
--- a/ModelRefitter.hpp
+++ b/ModelRefitter.hpp
@@ -60,17 +60,17 @@ class ModelRefitter : public nvonnxparser::IParserRefitter
     //! TConvertFunc is a functor for converting ShapedWeights to an array of type T.
     //! It should return a T*.
     template <typename T, typename TConvertFunc>
-    ValueOrStatus<size_t> batchnormWeightRefitter(
+    size_t batchnormWeightRefitter(
         ::ONNX_NAMESPACE::NodeProto const& node, std::vector<ShapedWeights>& inputs, TConvertFunc&& f);
 
-    Status refitOnnxWeights(::ONNX_NAMESPACE::ModelProto const& onnx_model);
-    Status refitOnnxGraph(::ONNX_NAMESPACE::GraphProto const& graph);
-    Status refitOnnxNode(::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph);
-    Status refitOnnxConstantNode(::ONNX_NAMESPACE::NodeProto const& node, std::string const& graphName);
-    Status refitOnnxBatchNormNode(::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph);
-    Status refitOnnxIfNode(::ONNX_NAMESPACE::NodeProto const& node);
-    Status refitOnnxLoopNode(::ONNX_NAMESPACE::NodeProto const& node);
-    Status refitOnnxScanNode(::ONNX_NAMESPACE::NodeProto const& node);
+    void refitOnnxWeights(::ONNX_NAMESPACE::ModelProto const& onnx_model);
+    void refitOnnxGraph(::ONNX_NAMESPACE::GraphProto const& graph);
+    void refitOnnxNode(::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph);
+    void refitOnnxConstantNode(::ONNX_NAMESPACE::NodeProto const& node, std::string const& graphName);
+    void refitOnnxBatchNormNode(::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph);
+    void refitOnnxIfNode(::ONNX_NAMESPACE::NodeProto const& node);
+    void refitOnnxLoopNode(::ONNX_NAMESPACE::NodeProto const& node);
+    void refitOnnxScanNode(::ONNX_NAMESPACE::NodeProto const& node);
 
 public:
     ModelRefitter(nvinfer1::IRefitter* refitter, nvinfer1::ILogger* logger)
diff --git a/OnnxAttrs.cpp b/OnnxAttrs.cpp
index 60ac5b1..0733768 100644
--- a/OnnxAttrs.cpp
+++ b/OnnxAttrs.cpp
@@ -129,9 +129,9 @@ onnx2trt::ShapedWeights OnnxAttrs::get<onnx2trt::ShapedWeights>(std::string cons
     std::string extName = this->at(key)->ref_attr_name();
     bool isExtAttr = isExternalAttribute(extName, mCtx);
 
-    ::ONNX_NAMESPACE::TensorProto const& onnx_weights_tensor = isExtAttr ? mCtx->localFunctionStack().back().second.at(extName)->t() : this->at(key)->t();
+    ::ONNX_NAMESPACE::TensorProto const& onnxTensor = isExtAttr ? mCtx->localFunctionStack().back().second.at(extName)->t() : this->at(key)->t();
     onnx2trt::ShapedWeights weights;
-    bool success = mCtx->getWeightsContext().convertOnnxWeights(onnx_weights_tensor, &weights);
+    bool success = mCtx->getWeightsContext().convertOnnxWeights(onnxTensor, &weights, true);
     if (!success)
     {
         throw std::runtime_error{"Unable to convert ONNX weights"};
diff --git a/README.md b/README.md
index 6962df2..ed1474f 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ For press and other inquiries, please contact Hector Marinez at hmarinez@nvidia.
 
 ## Supported TensorRT Versions
 
-Development on the this branch is for the latest version of [TensorRT 10.2](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
+Development on the this branch is for the latest version of [TensorRT 10.4](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
 
 For previous versions of TensorRT, refer to their respective branches.
 
@@ -29,8 +29,8 @@ Current supported ONNX operators are found in the [operator support matrix](docs
 ### Dependencies
 
  - [Protobuf >= 3.0.x](https://github.com/google/protobuf/releases)
- - [TensorRT 10.2](https://developer.nvidia.com/tensorrt)
- - [TensorRT 10.2 open source libaries] (https://github.com/NVIDIA/TensorRT/)
+ - [TensorRT 10.4](https://developer.nvidia.com/tensorrt)
+ - [TensorRT 10.4 open source libaries] (https://github.com/NVIDIA/TensorRT/)
 
 ### Building
 
@@ -82,7 +82,7 @@ Refer to the link or run `polygraphy run -h` for more information on CLI options
 
 Python bindings for the ONNX-TensorRT parser are packaged in the shipped `.whl` files.
 
-TensorRT 10.1 supports ONNX release 1.16.0. Install it with:
+TensorRT 10.4 supports ONNX release 1.16.0. Install it with:
 
     python3 -m pip install onnx==1.16.0
 
diff --git a/ShapeTensor.cpp b/ShapeTensor.cpp
index f177136..ffc6bc7 100644
--- a/ShapeTensor.cpp
+++ b/ShapeTensor.cpp
@@ -542,7 +542,7 @@ nvinfer1::ISliceLayer* addSlice(ImporterContext* ctx, nvinfer1::ITensor& data, c
     constexpr int32_t minDim = std::numeric_limits<int32_t>::min();
     constexpr int32_t maxDim = std::numeric_limits<int32_t>::max();
     nvinfer1::ISliceLayer* slice = N_CHECK(ctx->network()->addSlice(data,
-        shapeTensorToDims(starts, "slice start", 0, maxDim), shapeTensorToDims(sizes, "slice size", 0, maxDim),
+        shapeTensorToDims(starts, "slice start", minDim, maxDim), shapeTensorToDims(sizes, "slice size", 0, maxDim),
         shapeTensorToDims(strides, "slide strides", minDim, maxDim)));
     setShapeInputIfDynamic(ctx, slice, 1, starts);
     setShapeInputIfDynamic(ctx, slice, 2, sizes);
diff --git a/Status.hpp b/Status.hpp
index 98c0909..2af35a0 100644
--- a/Status.hpp
+++ b/Status.hpp
@@ -203,6 +203,7 @@ static std::ostream& operator<<(std::ostream& stream, nvinfer1::DataType const&
     case nvinfer1::DataType::kBOOL: return stream << "bool";
     case nvinfer1::DataType::kFP8: return stream << "float8";
     case nvinfer1::DataType::kINT4: return stream << "int4";
+
     default: throw std::runtime_error("Unknown dtype");
     }
 }
diff --git a/docs/Changelog.md b/docs/Changelog.md
index 700f447..d390b37 100644
--- a/docs/Changelog.md
+++ b/docs/Changelog.md
@@ -2,6 +2,14 @@
 
 # ONNX-TensorRT Changelog
 
+# TensorRT 10.4 GA Release - 2024-9-5
+For more details, see the 10.4 GA release notes.
+
+- Added support for tensor `axes` for `Pad` operations
+- Added support for `BlackmanWindow`, `HammingWindow`, and `HannWindow` operations
+- Improved error handling in `IParserRefitter`
+- Fixed kernel shape inference in multi-input convolutions
+
 # TensorRT 10.3 GA Release - 2024-8-7
 For more details, see the 10.3 GA release notes.
 
@@ -14,13 +22,14 @@ For more details, see the 10.2 GA release notes.
 - Improved error handling with new macros and classes
 - Minor changes to op importers for `GRU` and `Squeeze`
 
-# TensorRT 10.1 GA Release - 2024-6-17
+# TensorRT 10.1 GA Release - 2024-6-10
 For more details, see the 10.1 GA release notes.
 
 - Added `supportsModelV2` API
 - Added support for `DeformConv` operation
 - Added support for `PluginV3` TensorRT Plugins
 - Marked all IParser and IParserRefitter APIs as `noexcept`
+- Shape inputs can be passed to custom ops supported by `IPluginV3`-based plugins by indicating the input indices to be interpreted as shape inputs by a node attribute named `tensorrt_plugin_shape_input_indices`.
 
 # TensorRT 10.0 GA Release - 2024-4-25
 For more details, see the 10.0 GA release notes.
diff --git a/docs/operators.md b/docs/operators.md
index 0a1bcfa..170fbb1 100644
--- a/docs/operators.md
+++ b/docs/operators.md
@@ -2,7 +2,7 @@
 
 # Supported ONNX Operators
 
-TensorRT 10.0 supports operators in the inclusive range of opset 9 to opset 20. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/main/docs/Operators.md). More details and limitations are documented in the chart below.
+TensorRT 10.4 supports operators in the inclusive range of opset 9 to opset 20. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/main/docs/Operators.md). More details and limitations are documented in the chart below.
 
 TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOAT16, INT32, INT64, FP8, INT8, INT4, UINT8, and BOOL
 
@@ -36,7 +36,7 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA
 | BitwiseNot                | N          |
 | BitwiseOr                 | N          |
 | BitwiseXor                | N          |
-| BlackmanWindow            | N          |
+| BlackmanWindow            | Y          |
 | Cast                      | Y          | FP32, FP16, BF16, INT32, INT64, UINT8, BOOL |                                                                                                       |
 | CastLike                  | Y          | FP32, FP16, BF16, INT32, INT64, UINT8, BOOL |                                                                                                       |
 | Ceil                      | Y          | FP32, FP16, BF16 |
@@ -85,8 +85,8 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA
 | GridSample                | Y          | FP32, FP16 | Input must be 4D input.
 | GroupNormalization        | Y          | FP32, FP16, BF16 |
 | GRU                       | Y          | FP32, FP16, BF16 | For bidirectional GRUs, activation functions must be the same for both the forward and reverse pass
-| HammingWindow             | N          |
-| HannWindow                | N          |
+| HammingWindow             | Y          |
+| HannWindow                | Y          |
 | HardSigmoid               | Y          | FP32, FP16, BF16 |
 | HardSwish                 | Y          | FP32, FP16, BF16 |
 | Hardmax                   | Y          | FP32, FP16, BF16 | `axis` dimension of input must be a build-time constant
@@ -132,7 +132,7 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA
 | OptionalGetElement        | N          |
 | OptionalHasElement        | N          |
 | Or                        | Y          | BOOL |
-| Pad                       | Y          | FP32, FP16, BF16, INT32, INT64 | `axes` must be an initializer |
+| Pad                       | Y          | FP32, FP16, BF16, INT32, INT64 |
 | ParametricSoftplus        | Y          | FP32, FP16, BF16 |
 | Pow                       | Y          | FP32, FP16, BF16 |
 | PRelu                     | Y          | FP32, FP16, BF16 |
@@ -184,7 +184,7 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA
 | Sin                       | Y          | FP32, FP16, BF16 |
 | Sinh                      | Y          | FP32, FP16, BF16 |
 | Size                      | Y          | FP32, FP16, BF16, INT32, INT64, BOOL |
-| Slice                     | Y          | FP32, FP16, BF16, INT32, INT64, BOOL | 
+| Slice                     | Y          | FP32, FP16, BF16, INT32, INT64, BOOL |
 | Softmax                   | Y          | FP32, FP16, BF16 |
 | SoftmaxCrossEntropyLoss   | N          |
 | Softplus                  | Y          | FP32, FP16, BF16 |
diff --git a/importerUtils.cpp b/importerUtils.cpp
index c130889..1ec5b3e 100644
--- a/importerUtils.cpp
+++ b/importerUtils.cpp
@@ -3,7 +3,6 @@
  */
 
 #include "importerUtils.hpp"
-#include "NvInferSafeRuntime.h"
 #include "OnnxAttrs.hpp"
 #include "bfloat16.hpp"
 #include <ctype.h>
@@ -877,15 +876,6 @@ nvinfer1::IPluginCreatorInterface* importPluginCreator(ImporterContext* ctx, std
     creator = pluginRegistry.getCreator(pluginName.c_str(), pluginVersion.c_str(), pluginNamespace.c_str());
 #endif // ENABLE_STD_PLUGIN
 
-#if ENABLE_SAFE_PLUGIN
-    auto safetyPluginRegistry = nvinfer1::getBuilderSafePluginRegistry(nvinfer1::EngineCapability::kSAFETY);
-    if (creator == nullptr && safetyPluginRegistry != nullptr)
-    {
-        creator = safetyPluginRegistry->getPluginCreator(
-            pluginName.c_str(), pluginVersion.c_str(), pluginNamespace.c_str());
-    }
-#endif // ENABLE_SAFE_PLUGIN
-
     // Do not perform a N_CHECK here as a plugin not being found is a valid case. It is up to the callers to handle the
     // nullptr correctly.
     return creator;
@@ -1207,8 +1197,8 @@ NodeImportResult instanceNormPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE
         tensorPtr = unsqueezeTensor(ctx, node, *tensorPtr, axes);
         ASSERT(tensorPtr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE);
     }
-    auto scale_weights = inputs.at(1).weights();
-    auto bias_weights = inputs.at(2).weights();
+    auto scaleWeights = inputs.at(1).weights();
+    auto biasWeights = inputs.at(2).weights();
     OnnxAttrs attrs(node, ctx);
     float epsilon = attrs.get("epsilon", 1e-5F);
     int32_t const relu{0};  // the ONNX instance norm op does not use the relu parameter
@@ -1220,12 +1210,12 @@ NodeImportResult instanceNormPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE
     std::vector<nvinfer1::PluginField> f;
 
     // get the values of constant inputs and cast them to float32
-    float const* scaleValues = ctx->getWeightsContext().getFP32Values(scale_weights);
-    float const* biasValues = ctx->getWeightsContext().getFP32Values(bias_weights);
+    float const* scaleValues = ctx->getWeightsContext().getFP32Values(scaleWeights);
+    float const* biasValues = ctx->getWeightsContext().getFP32Values(biasWeights);
 
     f.emplace_back("epsilon", &epsilon, nvinfer1::PluginFieldType::kFLOAT32, 1);
-    f.emplace_back("scales", scaleValues, nvinfer1::PluginFieldType::kFLOAT32, scale_weights.count());
-    f.emplace_back("bias", biasValues, nvinfer1::PluginFieldType::kFLOAT32, bias_weights.count());
+    f.emplace_back("scales", scaleValues, nvinfer1::PluginFieldType::kFLOAT32, scaleWeights.count());
+    f.emplace_back("bias", biasValues, nvinfer1::PluginFieldType::kFLOAT32, biasWeights.count());
     f.emplace_back("relu", &relu, nvinfer1::PluginFieldType::kINT32, 1);
     f.emplace_back("alpha", &alpha, nvinfer1::PluginFieldType::kFLOAT32, 1);
 
@@ -1335,6 +1325,26 @@ NodeImportResult normalizationHelper(ImporterContext* ctx, const ::ONNX_NAMESPAC
     return {{output}};
 }
 
+Status normalizeAxes(ShapeTensor& axes, int32_t const rank)
+{
+    ASSERT(axes.allValuesKnown() && "Axes should not contain unknown values.", ErrorCode::kINTERNAL_ERROR);
+    std::vector<int64_t> newAxes;
+    newAxes.reserve(axes.size());
+    for (int64_t axis : axes)
+    {
+        ASSERT((-rank <= axis && axis < rank) && "Axis must be in the range of [-rank, rank-1].",
+            ErrorCode::kINVALID_VALUE);
+        // "Negative value means counting dimensions from the back."
+        if (axis < 0)
+        {
+            axis += rank;
+        }
+        newAxes.push_back(axis);
+    }
+    axes = ShapeTensor(1, std::move(newAxes));
+    return Status::success();
+}
+
 nvinfer1::Dims insertDimension(nvinfer1::Dims const& dims, int const axis, int const value)
 {
     if (axis >= nvinfer1::Dims::MAX_DIMS || dims.nbDims >= nvinfer1::Dims::MAX_DIMS)
@@ -1743,22 +1753,27 @@ NodeImportResult convMultiInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::No
     std::vector<TensorOrWeights>& inputs)
 {
     ASSERT(inputs.size() >= 2 && "Convolution require at least 2 inputs.", ErrorCode::kUNSUPPORTED_NODE);
-    nvinfer1::ITensor* input_tensor_ptr = &convertToTensor(inputs.at(0), ctx);
-    nvinfer1::Dims const input_dims = input_tensor_ptr->getDimensions();
-
-    nvinfer1::Dims dims = input_dims;
+    nvinfer1::ITensor* input = &convertToTensor(inputs.at(0), ctx);
+    nvinfer1::Dims dims = input->getDimensions();
     bool needToExpandDims = (dims.nbDims == 3);
     if (needToExpandDims)
     {
         // Expand spatial dims from 1D to 2D
-        const std::vector<int32_t> axes{3};
-        input_tensor_ptr = unsqueezeTensor(ctx, node, *input_tensor_ptr, axes);
-        dims = input_tensor_ptr->getDimensions();
+        std::vector<int32_t> const axes{3};
+        input = unsqueezeTensor(ctx, node, *input, axes);
+        dims = input->getDimensions();
     }
     auto const nbSpatialDims = dims.nbDims - 2;
 
-    nvinfer1::Dims filter_dim;
-    filter_dim.nbDims = nbSpatialDims;
+    nvinfer1::Dims kernelDims;
+    kernelDims.nbDims = nbSpatialDims;
+
+    // Populate spatial dims from the shape of the convolution weights.
+    for (int32_t i = 1; i <= nbSpatialDims; ++i)
+    {
+        kernelDims.d[nbSpatialDims - i] = inputs.at(1).shape().d[inputs.at(1).shape().nbDims - i];
+    }
+
     nvinfer1::Dims strides = makeDims(nbSpatialDims, 1);
     nvinfer1::Dims begPadding = makeDims(nbSpatialDims, 0);
     nvinfer1::Dims endPadding = makeDims(nbSpatialDims, 0);
@@ -1766,49 +1781,49 @@ NodeImportResult convMultiInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::No
     nvinfer1::PaddingMode paddingMode;
     bool excludePadding{false};
     CHECK_STATUS(getKernelParams(
-        ctx, node, &filter_dim, &strides, &begPadding, &endPadding, paddingMode, excludePadding, &dilations));
+        ctx, node, &kernelDims, &strides, &begPadding, &endPadding, paddingMode, excludePadding, &dilations));
     auto const nChannel = dims.d[1];
     auto const K = inputs.at(1).shape().d[0];
     auto const C = inputs.at(1).shape().d[1];
 
-    auto kernel_weights = ShapedWeights::empty(::ONNX_NAMESPACE::TensorProto::FLOAT);
-    auto bias_weights = ShapedWeights::empty(::ONNX_NAMESPACE::TensorProto::FLOAT);
+    auto kernelWeights = ShapedWeights::empty(::ONNX_NAMESPACE::TensorProto::FLOAT);
+    auto biasWeights = ShapedWeights::empty(::ONNX_NAMESPACE::TensorProto::FLOAT);
 
-    auto const checkSpatialDims = [&nbSpatialDims, &filter_dim](nvinfer1::Dims const& dims) {
+    auto const checkSpatialDims = [&nbSpatialDims, &kernelDims](nvinfer1::Dims const& dims) {
         // Check that the number of spatial dimensions and the kernel shape matches up.
         if (nbSpatialDims != dims.nbDims - 2)
         {
             return false;
         }
 
-        return std::equal(filter_dim.d, filter_dim.d + nbSpatialDims, dims.d + dims.nbDims - nbSpatialDims);
+        return std::equal(kernelDims.d, kernelDims.d + nbSpatialDims, dims.d + dims.nbDims - nbSpatialDims);
     };
 
-    nvinfer1::ITensor* kernel_tensor_ptr{nullptr};
-    nvinfer1::ITensor* bias_tensor_ptr{nullptr};
+    nvinfer1::ITensor* kernelTensor{nullptr};
+    nvinfer1::ITensor* biasTensor{nullptr};
     if (inputs.at(1).is_tensor())
     {
-        kernel_tensor_ptr = &convertToTensor(inputs.at(1), ctx);
+        kernelTensor = &convertToTensor(inputs.at(1), ctx);
         if (needToExpandDims)
         {
             // Expand spatial dims from 1D to 2D
             std::vector<int32_t> const axes{3};
-            kernel_tensor_ptr = unsqueezeTensor(ctx, node, *kernel_tensor_ptr, axes);
-            ASSERT(kernel_tensor_ptr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE);
+            kernelTensor = unsqueezeTensor(ctx, node, *kernelTensor, axes);
+            ASSERT(kernelTensor && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE);
         }
-        ASSERT(checkSpatialDims(kernel_tensor_ptr->getDimensions())
+        ASSERT(checkSpatialDims(kernelTensor->getDimensions())
                 && "The input tensor shape misaligns with the input kernel shape.",
             ErrorCode::kUNSUPPORTED_NODE);
     }
     else
     {
-        kernel_weights = inputs.at(1).weights();
+        kernelWeights = inputs.at(1).weights();
         if (needToExpandDims)
         {
-            kernel_weights.shape.nbDims = 4;
-            kernel_weights.shape.d[3] = 1;
+            kernelWeights.shape.nbDims = 4;
+            kernelWeights.shape.d[3] = 1;
         }
-        ASSERT_NODE(checkSpatialDims(kernel_weights.shape),
+        ASSERT_NODE(checkSpatialDims(kernelWeights.shape),
             "The input tensor shape misaligns with the input kernel shape.", node, nodeIdx,
             ErrorCode::kUNSUPPORTED_NODE);
     }
@@ -1817,11 +1832,11 @@ NodeImportResult convMultiInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::No
     {
         if (inputs.at(2).is_weights())
         {
-            bias_weights = inputs.at(2).weights();
+            biasWeights = inputs.at(2).weights();
         }
         else
         {
-            bias_tensor_ptr = &convertToTensor(inputs.at(2), ctx);
+            biasTensor = &convertToTensor(inputs.at(2), ctx);
         }
     }
 
@@ -1832,7 +1847,7 @@ NodeImportResult convMultiInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::No
         ErrorCode::kINVALID_NODE);
 
     nvinfer1::IConvolutionLayer* layer
-        = N_CHECK(ctx->network()->addConvolutionNd(*input_tensor_ptr, K, filter_dim, kernel_weights, bias_weights));
+        = N_CHECK(ctx->network()->addConvolutionNd(*input, K, kernelDims, kernelWeights, biasWeights));
     ASSERT_NODE(layer, "Failed to add the Convolution layer.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE);
     layer->setStrideNd(strides);
     layer->setPaddingMode(paddingMode);
@@ -1842,13 +1857,13 @@ NodeImportResult convMultiInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::No
     layer->setNbGroups(ngroup);
 
     // Set dynamic weights
-    if (kernel_tensor_ptr)
+    if (kernelTensor)
     {
-        layer->setInput(1, *kernel_tensor_ptr);
+        layer->setInput(1, *kernelTensor);
     }
-    if (bias_tensor_ptr)
+    if (biasTensor)
     {
-        layer->setInput(2, *bias_tensor_ptr);
+        layer->setInput(2, *biasTensor);
     }
     ctx->registerLayer(layer, node);
 
@@ -2332,4 +2347,44 @@ nvinfer1::IEinsumLayer* parseGraphWithMoreInputs(ImporterContext* ctx, ::ONNX_NA
     return einsumLayer;
 }
 
+nvinfer1::ITensor* generateWindow(ImporterContext* ctx, nvinfer1::ITensor* N)
+{
+    auto shapeOfN = ShapeTensor(*N, 0);
+    nvinfer1::IFillLayer* layer = N_CHECK(addFill(ctx, convertTo1D(ctx, shapeOfN), nvinfer1::FillOperation::kLINSPACE));
+    layer->setAlpha(0.0F);
+    layer->setBeta(1.0F);
+    auto* fillOutput = N_CHECK(layer->getOutput(0));
+    return fillOutput;
+}
+
+nvinfer1::ITensor* windowHelper(ImporterContext* ctx, float numerator, nvinfer1::ITensor* n, nvinfer1::ITensor* N,
+    nvinfer1::UnaryOperation op, int32_t periodic)
+{
+    auto* numeratorTensor = N_CHECK(addConstantScalar(ctx, numerator, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT,
+        nvinfer1::Dims{1, {1}})->getOutput(0));
+    auto numeratorLayer
+        = N_CHECK(ctx->network()->addElementWise(*numeratorTensor, *n, nvinfer1::ElementWiseOperation::kPROD));
+    auto numeratorOutput = N_CHECK(numeratorLayer->getOutput(0));
+
+    // If periodic is 0, subtract 1 from the denominator (N)
+    if (periodic == 0)
+    {
+        auto* one = N_CHECK(addConstantScalar(ctx, 1, ::ONNX_NAMESPACE::TensorProto_DataType_INT32)->getOutput(0));
+        one = castHelper(ctx, one, N->getType());
+        auto minusOne = N_CHECK(ctx->network()->addElementWise(*N, *one, nvinfer1::ElementWiseOperation::kSUB));
+        N = N_CHECK(minusOne->getOutput(0));
+    }
+
+    auto NFloat = N_CHECK(castHelper(ctx, N, nvinfer1::DataType::kFLOAT));
+    broadcastTensors(ctx, n, NFloat);
+    auto divLayer
+        = N_CHECK(ctx->network()->addElementWise(*numeratorOutput, *NFloat, nvinfer1::ElementWiseOperation::kDIV));
+    auto divOutput = N_CHECK(divLayer->getOutput(0));
+
+    auto trigLayer = N_CHECK(ctx->network()->addUnary(*divOutput, op));
+    auto trigOutput = N_CHECK(trigLayer->getOutput(0));
+
+    return N_CHECK(trigOutput);
+}
+
 } // namespace onnx2trt
diff --git a/importerUtils.hpp b/importerUtils.hpp
index 94699ac..73abe9c 100644
--- a/importerUtils.hpp
+++ b/importerUtils.hpp
@@ -259,6 +259,10 @@ nvinfer1::Dims makeDims(int nbDims, int val);
 NodeImportResult normalizationHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node,
     size_t const nodeIdx, std::vector<TensorOrWeights>& inputs);
 
+// Given a list of axes in the range of [-rank, rank-1], where rank is the rank
+// of the corresponding data tensor, normalize to [0, rank-1].
+Status normalizeAxes(ShapeTensor& axes, int32_t const rank);
+
 // Helper function to parse activation values for LSTM nodes
 std::vector<float> parseLSTMActivationValues(std::vector<nvinfer1::ActivationType> const& activationTypes,
     std::vector<float> const& activationValues, bool isAlpha);
@@ -430,8 +434,16 @@ Status processEllipsisAndImplicitOutput(
 nvinfer1::IEinsumLayer* parseGraphWithMoreInputs(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node,
     std::vector<nvinfer1::ITensor*> const& inputs, int64_t const nbInputs, std::string equation);
 
+// Helper function to convert TensorRT datatype enum into a human-readable string.
 std::string getTrtDtypeName(nvinfer1::DataType TrtDtype);
 
+// Helper fucntion to generate a Window tensor for Window operations (HannWindow, HammingWindow, BlackmanWindow).
+nvinfer1::ITensor* generateWindow(ImporterContext* ctx, nvinfer1::ITensor* N);
+
+// Helper function to handle Window generation ops. Calculates TrigOp(numerator*n / N) and returns the output tensor.
+nvinfer1::ITensor* windowHelper(ImporterContext* ctx, float numerator, nvinfer1::ITensor* n, nvinfer1::ITensor* N,
+    nvinfer1::UnaryOperation op, int32_t periodic);
+
 //! Describes occurrence of a named dimension.
 class NamedDimension
 {
diff --git a/onnxOpCheckers.cpp b/onnxOpCheckers.cpp
index f3c7f9f..103df3a 100644
--- a/onnxOpCheckers.cpp
+++ b/onnxOpCheckers.cpp
@@ -17,7 +17,6 @@
 #include "importerUtils.hpp"
 
 #include <array>
-#include <cmath>
 #include <iostream>
 #include <iterator>
 #include <tuple>
@@ -175,6 +174,8 @@ DEFINE_OP_CHECKER(BatchNormalization)
         ErrorCode::kUNSUPPORTED_NODE, node, errors, nodeIndex);
 }
 
+DEFINE_OP_EMPTY_CHECKER(BlackmanWindow)
+
 DEFINE_OP_CHECKER(Cast)
 {
     OnnxAttrs attrs(node, ctx);
@@ -359,6 +360,10 @@ DEFINE_OP_CHECKER(GRU)
     }
 }
 
+DEFINE_OP_EMPTY_CHECKER(HammingWindow)
+
+DEFINE_OP_EMPTY_CHECKER(HannWindow)
+
 DEFINE_OP_EMPTY_CHECKER(Hardmax)
 
 DEFINE_OP_EMPTY_CHECKER(HardSigmoid)
@@ -1046,11 +1051,6 @@ DEFINE_OP_CHECKER(Bernoulli)
     STATIC_CHECK(false, ErrorCode::kUNSUPPORTED_NODE, node, errors, nodeIndex);
 }
 
-DEFINE_OP_CHECKER(BlackmanWindow)
-{
-    STATIC_CHECK(false, ErrorCode::kUNSUPPORTED_NODE, node, errors, nodeIndex);
-}
-
 DEFINE_OP_CHECKER(CenterCropPad)
 {
     STATIC_CHECK(false, ErrorCode::kUNSUPPORTED_NODE, node, errors, nodeIndex);
@@ -1061,16 +1061,6 @@ DEFINE_OP_CHECKER(DynamicQuantizeLinear)
     STATIC_CHECK(false, ErrorCode::kUNSUPPORTED_NODE, node, errors, nodeIndex);
 }
 
-DEFINE_OP_CHECKER(HammingWindow)
-{
-    STATIC_CHECK(false, ErrorCode::kUNSUPPORTED_NODE, node, errors, nodeIndex);
-}
-
-DEFINE_OP_CHECKER(HannWindow)
-{
-    STATIC_CHECK(false, ErrorCode::kUNSUPPORTED_NODE, node, errors, nodeIndex);
-}
-
 DEFINE_OP_CHECKER(NegativeLogLikelihoodLoss)
 {
     STATIC_CHECK(false, ErrorCode::kUNSUPPORTED_NODE, node, errors, nodeIndex);
diff --git a/onnxOpImporters.cpp b/onnxOpImporters.cpp
index fa37ec7..45d19f1 100644
--- a/onnxOpImporters.cpp
+++ b/onnxOpImporters.cpp
@@ -2,7 +2,11 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-#include "onnxOpImporters.hpp"
+#if defined(_MSC_VER)
+#define _USE_MATH_DEFINES
+#endif
+#include <cmath>
+
 #include "ConditionalHelpers.hpp"
 #include "LoopHelpers.hpp"
 #include "ModelImporter.hpp"
@@ -15,10 +19,10 @@
 #include "bfloat16.hpp"
 #include "half.h"
 #include "importerUtils.hpp"
+#include "onnxOpImporters.hpp"
 
 #include <algorithm> // For std::min, std::max
 #include <array>
-#include <cmath>
 #include <cstring> // For std::memcpy, std::memset
 #include <iostream>
 #include <iterator>
@@ -382,6 +386,63 @@ DEFINE_BUILTIN_OP_IMPORTER(BatchNormalization)
         ShapedWeights::empty(::ONNX_NAMESPACE::TensorProto::FLOAT), combinedBias.getName(), combinedScale.getName());
 }
 
+DEFINE_BUILTIN_OP_IMPORTER(BlackmanWindow)
+{
+
+    /***
+
+    Operation returns a window vector, where
+
+    Y[n] = 0.42 - 0.5cos(2pi*n / N) + 0.08cos(4pi*n / N)
+
+    Where N is the window length, and n is each element in the window.
+
+    Note that if `periodic == 0`, the denominator becomes N - 1.
+
+    This can be represented by creating a range 'n' from 0 -> N, and performing the operations elementwise.
+
+    ***/
+
+    OnnxAttrs attrs(node, ctx);
+    int32_t outputDtype = attrs.get<int32_t>("output_datatype", 1);
+    int32_t periodic = attrs.get<int32_t>("periodic", 1);
+    ASSERT_NODE(outputDtype == 1, "Output must be float32-type!", node, nodeIdx, ErrorCode::kINVALID_NODE);
+
+    constexpr float alpha = 0.42F;
+    constexpr float beta = 0.5F;
+    constexpr float gamma = 0.08F;
+
+    auto* N = &convertToTensor(inputs.at(0), ctx);
+    ASSERT_NODE(
+        N->getDimensions().nbDims == 0, "Window length must be a scalar!", node, nodeIdx, ErrorCode::kINVALID_NODE);
+    auto* window = generateWindow(ctx, N);
+
+    auto lhsCosOutput = windowHelper(ctx, 2.F * M_PI, window, N, nvinfer1::UnaryOperation::kCOS, periodic);
+
+    auto betaTensor = N_CHECK(addConstantScalar(ctx, beta, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT,
+        nvinfer1::Dims{1, {1}})->getOutput(0));
+    auto betaLayer
+        = N_CHECK(ctx->network()->addElementWise(*betaTensor, *lhsCosOutput, nvinfer1::ElementWiseOperation::kPROD));
+    auto betaOutput = N_CHECK(betaLayer->getOutput(0));
+
+    auto rhsCosOutput = windowHelper(ctx, 4.F * M_PI, window, N, nvinfer1::UnaryOperation::kCOS, periodic);
+    auto gammaTensor = N_CHECK(addConstantScalar(ctx, gamma, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT,
+        nvinfer1::Dims{1, {1}})->getOutput(0));
+    auto gammaLayer
+        = N_CHECK(ctx->network()->addElementWise(*gammaTensor, *rhsCosOutput, nvinfer1::ElementWiseOperation::kPROD));
+    auto gammaOutput = N_CHECK(gammaLayer->getOutput(0));
+
+    auto alphaTensor = N_CHECK(addConstantScalar(ctx, alpha, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT,
+        nvinfer1::Dims{1, {1}})->getOutput(0));
+    auto alphaMinusBeta
+        = N_CHECK(ctx->network()->addElementWise(*alphaTensor, *betaOutput, nvinfer1::ElementWiseOperation::kSUB));
+    auto alphaMinusBetaTensor = N_CHECK(alphaMinusBeta->getOutput(0));
+
+    auto plusGamma = N_CHECK(
+        ctx->network()->addElementWise(*alphaMinusBetaTensor, *gammaOutput, nvinfer1::ElementWiseOperation::kSUM));
+    RETURN_FIRST_OUTPUT(plusGamma, node, nodeIdx);
+}
+
 DEFINE_BUILTIN_OP_IMPORTER(Cast)
 {
     // Get input node.
@@ -1652,6 +1713,9 @@ NodeImportResult QuantDequantLinearHelper(ImporterContext* ctx, ::ONNX_NAMESPACE
         layer->setInput(2, *zeroPointInput);
     }
 
+    // Register the Q/DQ layer.
+    ctx->registerLayer(layer, node);
+
     // Return layer output
     RETURN_FIRST_OUTPUT(layer, node, nodeIdx);
 }
@@ -2492,9 +2556,11 @@ DEFINE_BUILTIN_OP_IMPORTER(GRU)
 
     // H(t) = (1 - z(t)) . h(t) + (z(t) . H(t-1))
     // Constant `1` needs to be the same type as the inputs, either FP16 or FP32.
-    auto onnxType = zt->getType() == nvinfer1::DataType::kHALF ? ::ONNX_NAMESPACE::TensorProto::FLOAT16
-                                                               : ::ONNX_NAMESPACE::TensorProto::FLOAT;
-    auto* constOne = N_CHECK(addConstantScalar(ctx, 1.f, onnxType, Dims3{1, 1, 1})->getOutput(0));
+    auto* constOne = zt->getType() == nvinfer1::DataType::kHALF
+        ? N_CHECK(addConstantScalar(
+            ctx, static_cast<half_float::half>(1), ::ONNX_NAMESPACE::TensorProto::FLOAT16, Dims3{1, 1, 1})
+                      ->getOutput(0))
+        : N_CHECK(addConstantScalar(ctx, 1.f, ::ONNX_NAMESPACE::TensorProto::FLOAT, Dims3{1, 1, 1})->getOutput(0));
     nvinfer1::ITensor* Ht = getElementWiseResult(ctx,
         *getElementWiseResult(ctx, *getElementWiseResult(ctx, *constOne, *zt, eOp::kSUB), *ht, eOp::kPROD),
         *getElementWiseResult(ctx, *zt, *Ht1Output, eOp::kPROD), eOp::kSUM);
@@ -2526,6 +2592,90 @@ DEFINE_BUILTIN_OP_IMPORTER(GRU)
     return {{outputs}};
 }
 
+DEFINE_BUILTIN_OP_IMPORTER(HammingWindow)
+{
+    /***
+
+    Operation returns a window vector, where:
+
+    Y[n] = alpha - beta * cos(2*pi*n / N)
+
+    Where N is the window length, and n is each element in the window.
+
+    Note that if `periodic == 0`, the denominator becomes N - 1.
+
+    This can be represented by creating a range 'n' from 0 -> N, and performing the operations elementwise.
+
+    Note that in the ONNX op definition alpha and beta are not provided. We will use the default values defined in ONNX:
+
+        alpha = 25/46
+        beta = 1 - alpha
+
+    ***/
+
+    OnnxAttrs attrs(node, ctx);
+    int32_t outputDtype = attrs.get<int32_t>("output_datatype", 1);
+    int32_t periodic = attrs.get<int32_t>("periodic", 1);
+    ASSERT_NODE(outputDtype == 1, "Output must be float32-type!", node, nodeIdx, ErrorCode::kINVALID_NODE);
+
+    constexpr float alpha = 25.F / 46.F;
+    constexpr float beta = 1.F - alpha;
+
+    auto* N = &convertToTensor(inputs.at(0), ctx);
+    ASSERT_NODE(
+        N->getDimensions().nbDims == 0, "Window length must be a scalar!", node, nodeIdx, ErrorCode::kINVALID_NODE);
+    auto* window = generateWindow(ctx, N);
+
+    auto* cosOutput = windowHelper(ctx, 2.F * M_PI, window, N, nvinfer1::UnaryOperation::kCOS, periodic);
+
+    auto betaTensor = N_CHECK(addConstantScalar(ctx, beta, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT,
+        nvinfer1::Dims{1, {1}})->getOutput(0));
+    auto betaLayer
+        = N_CHECK(ctx->network()->addElementWise(*betaTensor, *cosOutput, nvinfer1::ElementWiseOperation::kPROD));
+    auto betaOutput = N_CHECK(betaLayer->getOutput(0));
+
+    auto alphaTensor = N_CHECK(addConstantScalar(ctx, alpha, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT,
+        nvinfer1::Dims{1, {1}})->getOutput(0));
+    auto alphaLayer
+        = N_CHECK(ctx->network()->addElementWise(*alphaTensor, *betaOutput, nvinfer1::ElementWiseOperation::kSUB));
+
+    RETURN_FIRST_OUTPUT(alphaLayer, node, nodeIdx);
+}
+
+DEFINE_BUILTIN_OP_IMPORTER(HannWindow)
+{
+    /*** 
+    
+    Operation returns a window vector, where:
+
+    Y[n] = sin^2(pi*n / N)
+
+    Where N is the window length, and n is each element in the window.
+
+    Note that if `periodic == 0`, the denominator becomes N - 1.
+
+    This can be represented by creating a range 'n' from 0 -> N, and performing the operations elementwise.
+
+    ***/
+
+    OnnxAttrs attrs(node, ctx);
+    int32_t outputDtype = attrs.get<int32_t>("output_datatype", 1);
+    int32_t periodic = attrs.get<int32_t>("periodic", 1);
+    ASSERT_NODE(outputDtype == 1, "Output must be float32-type!", node, nodeIdx, ErrorCode::kINVALID_NODE);
+
+    auto* N = &convertToTensor(inputs.at(0), ctx);
+    ASSERT_NODE(
+        N->getDimensions().nbDims == 0, "Window length must be a scalar!", node, nodeIdx, ErrorCode::kINVALID_NODE);
+    auto* window = generateWindow(ctx, N);
+
+    auto sinOutput = windowHelper(ctx, M_PI, window, N, nvinfer1::UnaryOperation::kSIN, periodic);
+
+    auto sinSquaredLayer
+        = N_CHECK(ctx->network()->addElementWise(*sinOutput, *sinOutput, nvinfer1::ElementWiseOperation::kPROD));
+
+    RETURN_FIRST_OUTPUT(sinSquaredLayer, node, nodeIdx);
+}
+
 DEFINE_BUILTIN_OP_IMPORTER(Hardmax)
 {
     CHECK_STATUS(notInvalidType(inputs.at(0), {"INT64", "INT32", "INT8", "UINT8", "BOOL"}, node, nodeIdx));
@@ -3795,7 +3945,6 @@ DEFINE_BUILTIN_OP_IMPORTER(Pad)
     float value{0.F};
     nvinfer1::ITensor* valuePtr = nullptr;
     std::vector<int64_t> onnxPadding;
-    std::vector<int32_t> padAxes;
 
     if (ctx->getOpsetVersion() < 11)
     {
@@ -3847,106 +3996,15 @@ DEFINE_BUILTIN_OP_IMPORTER(Pad)
                 valuePtr = &convertToTensor(inputs.at(2), ctx);
             }
         }
-        // Opset 16 optional `axes` input.
-        if (inputs.size() == 4 && !inputs.at(3).isNullTensor())
-        {
-            // Currently, `axes` input is supported only as an initializer.
-            if (inputs.at(3).is_weights())
-            {
-                // `axes` is an initializer input.
-                CHECK_STATUS(weightsToVector<int32_t>(inputs.at(3).weights(), &padAxes));
-                // Sanity check.
-                ASSERT_NODE(std::unordered_set<int32_t>(padAxes.begin(), padAxes.end()).size() == padAxes.size(),
-                    "The input axes must have unique elements.", node, nodeIdx, ErrorCode::kINVALID_NODE);
-                // Accepted range of axis is [-r, r-1] where r = rank(data).
-                for (int32_t& axis : padAxes)
-                {
-                    CHECK_STATUS(convertAxis(axis, nbDims, node, nodeIdx));
-                }
-            }
-            else
-            {
-                // `axes` is a non-null tensor input.
-                ASSERT_NODE(false, "TensorRT does not support dynamic axes for pad!", node, nodeIdx,
-                    ErrorCode::kUNSUPPORTED_NODE_INPUT);
-            }
-        }
     }
 
-    nvinfer1::ITensor* start{};
-    nvinfer1::ITensor* size{};
-    if (onnxPadding.empty())
-    {
-        // `pads` is from activation instead of initializer or attributes.
-        nvinfer1::ITensor* onnxPaddingPtr = &convertToTensor(inputs.at(1), ctx);
-        ASSERT_NODE((onnxPaddingPtr->getDimensions().nbDims == 1),
-            "The padding input must be 1D. The rank of padding input = " << onnxPaddingPtr->getDimensions().nbDims
-                                                                         << ".",
-            node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE);
-
-        // If `axes` is a non-empty input, onnxPaddingPtr needs to be updated with information from `axes`.
-        // Currently, `axes` is supported only if it's an initializer input.
-        if (!padAxes.empty())
-        {
-            ASSERT_NODE(static_cast<size_t>(onnxPaddingPtr->getDimensions().d[0]) == padAxes.size() * 2,
-                "pads should be twice the length of input axes i.e. "
-                    << 2 * padAxes.size() << ", actual length is: " << onnxPaddingPtr->getDimensions().d[0],
-                node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE);
+    auto padAxes
+        = inputs.size() == 4 && !inputs.at(3).isNullTensor() ? ShapeTensor(ctx, inputs.at(3)) : iotaShapeVector(nbDims);
 
-            // onnxPaddingPtr is of the format [x1_begin, x2_begin, ..., x1_end, x2_end,...].
-            ShapeTensor const paddingLen = gather(ctx, shapeOf(*onnxPaddingPtr), shapeVector(0));
-            ShapeTensor const halfPaddingLen = floorDiv(ctx, paddingLen, shapeVector(2));
-            // Obtain begins [x1_begin, x2_begin, ...,].
-            nvinfer1::ISliceLayer* beginSliceLayer
-                = addSlice(ctx, *onnxPaddingPtr, shapeVector(0), halfPaddingLen, shapeVector(1));
-            ctx->registerLayer(beginSliceLayer, node);
-            nvinfer1::ITensor* beginPads = beginSliceLayer->getOutput(0);
-            // Obtain ends [x1_end, x2_end, ...].
-            nvinfer1::ISliceLayer* endSliceLayer
-                = addSlice(ctx, *onnxPaddingPtr, halfPaddingLen, halfPaddingLen, shapeVector(1));
-            ctx->registerLayer(endSliceLayer, node);
-            nvinfer1::ITensor* endPads = endSliceLayer->getOutput(0);
-
-            // Map axes to corresponding begins & ends and create ordered begins & ends.
-            std::vector<int64_t> padAxesLongInt(padAxes.begin(), padAxes.end());
-            ShapeTensor const subscripts{axesToInterlaceSubscripts(ShapeTensor(1, std::move(padAxesLongInt)), nbDims)};
-            ShapeTensor const orderedBeginPads
-                = interlace(ctx, similar(ctx, tensorDims, 0), ShapeTensor(*beginPads), subscripts);
-            ShapeTensor const orderedEndPads
-                = interlace(ctx, similar(ctx, tensorDims, 0), ShapeTensor(*endPads), subscripts);
-
-            // Concatenate ordered begins & ends along zeroth dimension.
-            std::vector<nvinfer1::ITensor*> tensors{&orderedBeginPads.tensor(ctx), &orderedEndPads.tensor(ctx)};
-            auto* concatLayer = N_CHECK(ctx->network()->addConcatenation(tensors.data(), tensors.size()));
-            ctx->registerLayer(concatLayer, node);
-            ASSERT_NODE(concatLayer, "Failed to register layer.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE);
-            concatLayer->setAxis(0);
-            onnxPaddingPtr = N_CHECK(concatLayer->getOutput(0));
-        }
-
-        ASSERT_NODE(onnxPaddingPtr->getDimensions().d[0] == nbDims * 2,
-            "pads should be a 1D tensor of shape " << 2 * nbDims
-                                                   << ", actual shape is: " << onnxPaddingPtr->getDimensions().d[0],
-            node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE);
-
-        auto pre = ctx->network()
-                       ->addSlice(
-                           *onnxPaddingPtr, nvinfer1::Dims{1, {0}}, nvinfer1::Dims{1, {nbDims}}, nvinfer1::Dims{1, {1}})
-                       ->getOutput(0);
-        auto post = ctx->network()
-                        ->addSlice(*onnxPaddingPtr, nvinfer1::Dims{1, {nbDims}}, nvinfer1::Dims{1, {nbDims}},
-                            nvinfer1::Dims{1, {1}})
-                        ->getOutput(0);
-
-        std::vector<int64_t> const zerosVal(nbDims, 0);
-        auto const zeros = addConstant(ctx, zerosVal, ::ONNX_NAMESPACE::TensorProto::INT64,
-            nvinfer1::Dims{
-                1, {nbDims}})->getOutput(0);
-        start = getElementWiseResult(ctx, *zeros, *pre, nvinfer1::ElementWiseOperation::kSUB);
-        auto const totalPadding = getElementWiseResult(ctx, *pre, *post, nvinfer1::ElementWiseOperation::kSUM);
-        size = getElementWiseResult(ctx, shapeOf(*tensorPtr).tensor(ctx), *totalPadding, nvinfer1::ElementWiseOperation::kSUM);
-    }
-    else
+    ShapeTensor beginPads;
+    ShapeTensor endPads;
+    int32_t const padAxesSize = padAxes.size();
+    if (!onnxPadding.empty() && padAxes.allValuesKnown())
     {
         // The pads is from initializer or attributes.
         // Passthrough path for no-op padding.
@@ -3955,45 +4013,64 @@ DEFINE_BUILTIN_OP_IMPORTER(Pad)
             LOG_VERBOSE("Found no-op pad in node: " + getNodeName(node));
             RETURN_IDENTITY(inputs.at(0), node, nodeIdx);
         }
-        // If padAxes is non-empty, update onnxPadding combining information from padAxes.
-        if (!padAxes.empty())
-        {
 
-            // Sanity check.
-            ASSERT_NODE(onnxPadding.size() == padAxes.size() * 2,
-                "Length of pads input must be twice the length of axes input.", node, nodeIdx,
-                ErrorCode::kINVALID_NODE);
-
-            // Map axes to onnxPadding and build a temporary vector combining the information held by onnxPadding &
-            // padAxes. It is: a) of length 2 * rank(input) b) ordered by axis c) of the format [x1_begin, x2_begin,
-            // ..., x1_end, x2_end,...]
-            std::vector<int64_t> tempOnnxPadding(2 * nbDims, 0);
-            for (size_t idx = 0; idx < padAxes.size(); idx++)
-            {
-                int32_t const currAxis = padAxes[idx];
-                tempOnnxPadding[currAxis] = onnxPadding[idx];                           // x_begin.
-                tempOnnxPadding[nbDims + currAxis] = onnxPadding[padAxes.size() + idx]; // x_end.
-            }
+        // Sanity check.
+        ASSERT_NODE(static_cast<int32_t>(onnxPadding.size()) == padAxesSize * 2,
+            "Length of pads input must be twice the length of axes input.", node, nodeIdx, ErrorCode::kINVALID_NODE);
 
-            // Update onnxPadding to hold the combined information.
-            onnxPadding = std::move(tempOnnxPadding);
-        }
-        nvinfer1::ITensor* totalPadding = nullptr;
-        ASSERT_NODE(convertOnnxPadding(ctx, nbDims, onnxPadding, start, totalPadding), "Failed to convert padding!",
+        std::vector<int64_t> beginPadsVec(onnxPadding.begin(), onnxPadding.begin() + padAxesSize);
+        std::vector<int64_t> endPadsVec(onnxPadding.begin() + padAxesSize, onnxPadding.end());
+        beginPads = ShapeTensor(1, std::move(beginPadsVec));
+        endPads = ShapeTensor(1, std::move(endPadsVec));
+    }
+    else
+    {
+        nvinfer1::ITensor* onnxPaddingPtr = &convertToTensor(inputs.at(1), ctx);
+        ASSERT_NODE((onnxPaddingPtr->getDimensions().nbDims == 1),
+            "The padding input must be 1D. The rank of padding input = " << onnxPaddingPtr->getDimensions().nbDims
+                                                                         << ".",
             node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE);
-        size
-            = ctx->network()
-                  ->addElementWise(shapeOf(*tensorPtr).tensor(ctx), *totalPadding, nvinfer1::ElementWiseOperation::kSUM)
-                  ->getOutput(0);
+        ASSERT_NODE(onnxPaddingPtr->getDimensions().d[0] == padAxesSize * 2,
+            "pads should be twice the length of input axes i.e. "
+                << 2 * padAxesSize << ", actual length is: " << onnxPaddingPtr->getDimensions().d[0],
+            node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE);
+
+        // onnxPaddingPtr is of the format [x1_begin, x2_begin, ..., x1_end, x2_end,...].
+        ShapeTensor const paddingLen = gather(ctx, shapeOf(*onnxPaddingPtr), shapeVector(0));
+        ShapeTensor const halfPaddingLen = floorDiv(ctx, paddingLen, shapeVector(2));
+        // Obtain begins [x1_begin, x2_begin, ...,].
+        nvinfer1::ISliceLayer* beginSliceLayer
+            = addSlice(ctx, *onnxPaddingPtr, shapeVector(0), halfPaddingLen, shapeVector(1));
+        ctx->registerLayer(beginSliceLayer, node);
+        beginPads = ShapeTensor{*(beginSliceLayer->getOutput(0))};
+        // Obtain ends [x1_end, x2_end, ...].
+        nvinfer1::ISliceLayer* endSliceLayer
+            = addSlice(ctx, *onnxPaddingPtr, halfPaddingLen, halfPaddingLen, shapeVector(1));
+        ctx->registerLayer(endSliceLayer, node);
+        endPads = ShapeTensor{*(endSliceLayer->getOutput(0))};
     }
 
-    // add slice node
-    auto const stride = makeDims(nbDims, 1);
-    auto const& dummy = stride;
-    auto* layer = N_CHECK(ctx->network()->addSlice(*tensorPtr, dummy, dummy, stride));
+    if (padAxes.allValuesKnown())
+    {
+        // gather() requires indices to be normalized if their values are known
+        CHECK_STATUS(normalizeAxes(padAxes, nbDims));
+    }
+    auto axesDims = gather(ctx, tensorDims, padAxes);
+    ShapeTensor const zeros = similar(ctx, beginPads, 0);
+    ShapeTensor start = sub(ctx, zeros, beginPads);
+    ShapeTensor size = add(ctx, axesDims, add(ctx, beginPads, endPads));
+    ShapeTensor const stride = similar(ctx, start, 1);
+
+    auto* layer = N_CHECK(addSlice(ctx, *tensorPtr, start, size, stride));
     ASSERT_NODE(layer, "Could not create padding layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE);
-    layer->setInput(1, *start);
-    layer->setInput(2, *size);
+    if (padAxes.allValuesKnown())
+    {
+        layer->setAxes(shapeTensorToDims(padAxes, "slice axes", -nbDims, nbDims - 1));
+    }
+    else
+    {
+        layer->setInput(5, convertToTensor(inputs.at(3), ctx));
+    }
     if (mode == "constant")
     {
         layer->setMode(nvinfer1::SampleMode::kFILL);
@@ -5348,24 +5425,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Slice)
     if (axes.allValuesKnown())
     {
         // gather() requires indices to be normalized if their values are known
-        std::vector<int64_t> newAxes;
-        newAxes.reserve(axes.size());
-        for (int64_t axis : axes)
-        {
-            // "Accepted range is [-r, r-1] where r = rank(data)."
-            int32_t const r = dims.size();
-            ASSERT_NODE((-r <= axis && axis < r),
-                "The range of axis must be in [-r, r-1], where r is the rank of input data. Provided axis = "
-                    << axis << ", r = " << r << ".",
-                node, nodeIdx, ErrorCode::kINVALID_VALUE);
-            // "Negative value means counting dimensions from the back."
-            if (axis < 0)
-            {
-                axis += r;
-            }
-            newAxes.push_back(axis);
-        }
-        axes = ShapeTensor(1, std::move(newAxes));
+        CHECK_STATUS(normalizeAxes(axes, dims.size()));
     }
     // Get dimensions of dims that correspond to axes for the computation of sizes
     auto const axesDims = gather(ctx, dims, axes);
diff --git a/onnx_tensorrt/__init__.py b/onnx_tensorrt/__init__.py
index 45acd8d..afef8f2 100644
--- a/onnx_tensorrt/__init__.py
+++ b/onnx_tensorrt/__init__.py
@@ -4,4 +4,4 @@
 
 from . import backend
 
-__version__ = "10.3.0"
+__version__ = "10.4.0"