From 6872a9473391a73b96741711d52b98c2c3e25146 Mon Sep 17 00:00:00 2001
From: Kevin Chen <kevinch@nvidia.com>
Date: Fri, 10 Mar 2023 08:08:06 -0800
Subject: [PATCH] ONNX-TensorRT 8.6-EA release

Signed-off-by: Kevin Chen <kevinch@nvidia.com>
---
 CMakeLists.txt           |   4 +-
 ConditionalHelpers.cpp   |  39 ++-
 ConditionalHelpers.hpp   |  15 +-
 ImporterContext.cpp      | 154 +++++++++-
 ImporterContext.hpp      |  59 +++-
 ModelImporter.cpp        | 133 ++++++---
 ModelImporter.hpp        |  51 +++-
 NvOnnxParser.h           | 357 +++++++++++++++--------
 README.md                |   8 +-
 TensorOrWeights.hpp      |  18 +-
 builtin_op_importers.cpp | 602 ++++++++++++++++++---------------------
 docs/Changelog.md        |  17 ++
 docs/operators.md        |  21 +-
 onnx2trt.hpp             |  26 +-
 onnx2trt_utils.cpp       | 348 +++++++++++++++-------
 onnx2trt_utils.hpp       |  14 +-
 trt_utils.hpp            |   7 +-
 17 files changed, 1223 insertions(+), 650 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 23f9ea73..21abe1c9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,8 +28,8 @@ add_definitions("-DSOURCE_LENGTH=${SOURCE_LENGTH}")
 # Version information
 #--------------------------------------------------
 set(ONNX2TRT_MAJOR 8)
-set(ONNX2TRT_MINOR 5)
-set(ONNX2TRT_PATCH 1)
+set(ONNX2TRT_MINOR 6)
+set(ONNX2TRT_PATCH 0)
 set(ONNX2TRT_VERSION "${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}.${ONNX2TRT_PATCH}" CACHE STRING "ONNX2TRT version")
 
 #--------------------------------------------------
diff --git a/ConditionalHelpers.cpp b/ConditionalHelpers.cpp
index 8a01f0a4..1b222c91 100644
--- a/ConditionalHelpers.cpp
+++ b/ConditionalHelpers.cpp
@@ -15,7 +15,7 @@ using LayerName = std::string;
 using InputIndex = int32_t;
 
 // A SubgraphPortsMap maps either the inputs or outputs ports of each node in an ONNX graph.
-using SubgraphPortsMap = std::unordered_map<NodeName, std::set<InputIndex>>;
+using SubgraphPortsMap = std::unordered_map<nvinfer1::ITensor*, std::set<InputIndex>>;
 
 // An InputsMap tracks which IIfConditionalInputLayer we've added to a layer's inputs,
 // so that we can reuse them if needed.
@@ -27,7 +27,7 @@ using InputsMap = std::unordered_map<LayerName, nvinfer1::IIfConditionalInputLay
 SubgraphPortsMap::const_iterator findLayer(const SubgraphPortsMap& inputs, const std::string layerName)
 {
     return std::find_if(inputs.begin(), inputs.end(), [&](const auto& item) {
-        const auto& key = item.first;
+        std::string const key = item.first->getName();
         return layerName.compare(0, key.size(), key) == 0;
     });
 }
@@ -57,7 +57,7 @@ Status addConditionalInputLayer(IImporterContext* ctx, nvinfer1::IIfConditional*
         inputLayer = conditional->addInput(*input);
         inputsMap[name] = inputLayer;
         const std::string inputLayerName(name);
-        ctx->registerLayer(inputLayer, inputLayerName + "_InputLayer");
+        ctx->registerLayer(inputLayer, inputLayerName + "_InputLayer", nullptr);
         // Note: Since multiple conditionals may use the same external tensor, check unique names for output tensors of
         // IfConditionalInputLayers to avoid tensor name duplication.
         ctx->registerTensor(
@@ -132,9 +132,8 @@ Status addIfInputLayers(IImporterContext* ctx, nvinfer1::IIfConditional* conditi
 {
     // Find all of the tensors entering the subgraph.
     // The node-names are from the ONNX context.
-    using NodeName = std::string;
     using InputIndex = int32_t;
-    std::unordered_map<NodeName, std::set<InputIndex>> subgraphInputsMap;
+    std::unordered_map<nvinfer1::ITensor*, std::set<InputIndex>> subgraphInputsMap;
     getSubgraphInputs(newLayers, subgraphInputsMap);
 
     // Add a ConditionalInputLayer in front of each input that is external to the subgraph.
@@ -166,9 +165,8 @@ Status addIfOutputLayers(IImporterContext* ctx, nvinfer1::IIfConditional* condit
               }
           };
 
-    using NodeName = std::string;
-    std::unordered_map<NodeName, std::set<int32_t>> thenOutputs;
-    std::unordered_map<NodeName, std::set<int32_t>> elseOutputs;
+    std::unordered_map<nvinfer1::ITensor*, std::set<int32_t>> thenOutputs;
+    std::unordered_map<nvinfer1::ITensor*, std::set<int32_t>> elseOutputs;
 
     std::vector<std::string> thenReportedOutputs;
     getReportedOutputs(thenGraph, thenReportedOutputs);
@@ -182,14 +180,9 @@ Status addIfOutputLayers(IImporterContext* ctx, nvinfer1::IIfConditional* condit
         = [](IImporterContext* ctx, std::vector<nvinfer1::ITensor*>& sgOutputs, SubgraphPortsMap& subgraphOutputs,
               ::ONNX_NAMESPACE::GraphProto const& subgraph, std::vector<nvinfer1::ILayer*> subgraphLayers,
               StringMap<TensorOrWeights> const& subgraphTensors) {
-              for (const auto& layer : subgraphLayers)
+              for (auto const& pair : subgraphOutputs)
               {
-                  const auto layerName = layer->getName();
-                  auto iter = findLayer(subgraphOutputs, layerName);
-                  if (iter != subgraphOutputs.end())
-                  {
-                      sgOutputs.push_back(layer->getOutput(0));
-                  }
+                  sgOutputs.push_back(pair.first);
               }
 
               if (sgOutputs.empty())
@@ -221,7 +214,7 @@ Status addIfOutputLayers(IImporterContext* ctx, nvinfer1::IIfConditional* condit
     for (size_t i = 0; i < elseSGOutputTensors.size(); i++)
     {
         auto* outputLayer = conditional->addOutput(*thenOutputTensors[i], *elseSGOutputTensors[i]);
-        ctx->registerLayer(outputLayer, std::string(conditional->getName()) + "_OutputLayer");
+        ctx->registerLayer(outputLayer, std::string(conditional->getName()) + "_OutputLayer", nullptr);
         graphOutputs.emplace_back(outputLayer->getOutput(0));
     }
     return Status::success();
@@ -229,7 +222,7 @@ Status addIfOutputLayers(IImporterContext* ctx, nvinfer1::IIfConditional* condit
 
 // Given a subgraph, find all of its external inputs/outputs (tensors entering/exiting the subgraph).
 Status getSubgraphTensors(const std::vector<nvinfer1::ILayer*>& newLayers,
-    std::unordered_map<std::string, std::set<int32_t>>& externalOutputs, bool extractOutputs,
+    std::unordered_map<nvinfer1::ITensor*, std::set<int32_t>>& externalOutputs, bool extractOutputs,
     const std::vector<std::string>* reportedOutputs = nullptr)
 {
     using NodeName = std::string;
@@ -271,7 +264,7 @@ Status getSubgraphTensors(const std::vector<nvinfer1::ILayer*>& newLayers,
     };
 
     // Retrieve the list of tensors either exiting or entering the subgraph.
-    std::unordered_map<TensorName, std::vector<Port>> externalPortsMap;
+    std::unordered_map<nvinfer1::ITensor*, std::vector<Port>> externalPortsMap;
     auto filterTensors = [&](TensorsSet const& tensors, auto getNodeAccessor) {
         for (nvinfer1::ILayer const* l : newLayers)
         {
@@ -307,7 +300,7 @@ Status getSubgraphTensors(const std::vector<nvinfer1::ILayer*>& newLayers,
                     }
                     if (!reportedOutputs || prefixFound)
                     {
-                        externalPortsMap[tensorName].push_back(std::make_pair(nodeName, i));
+                        externalPortsMap[tensor].push_back(std::make_pair(nodeName, i));
                     }
                 }
                 i++;
@@ -330,23 +323,23 @@ Status getSubgraphTensors(const std::vector<nvinfer1::ILayer*>& newLayers,
     {
         for (const Port& inPort : input.second)
         {
-            auto const nodeName = inPort.first;
+            auto* tensor = input.first;
             auto const portIndex = inPort.second;
-            externalOutputs[nodeName].insert(portIndex);
+            externalOutputs[tensor].insert(portIndex);
         }
     }
     return Status::success();
 }
 
 Status getSubgraphOutputs(const std::vector<nvinfer1::ILayer*>& newLayers,
-    std::unordered_map<std::string, std::set<int32_t>>& externalOutputs,
+    std::unordered_map<nvinfer1::ITensor*, std::set<int32_t>>& externalOutputs,
     const std::vector<std::string>& reportedOutputs)
 {
     return getSubgraphTensors(newLayers, externalOutputs, true, &reportedOutputs);
 }
 
 Status getSubgraphInputs(const std::vector<nvinfer1::ILayer*>& newLayers,
-    std::unordered_map<std::string, std::set<int32_t>>& externalInputs)
+    std::unordered_map<nvinfer1::ITensor*, std::set<int32_t>>& externalInputs)
 {
     return getSubgraphTensors(newLayers, externalInputs, false);
 }
diff --git a/ConditionalHelpers.hpp b/ConditionalHelpers.hpp
index fb7d6feb..e4618f9b 100644
--- a/ConditionalHelpers.hpp
+++ b/ConditionalHelpers.hpp
@@ -19,17 +19,16 @@ namespace onnx2trt
 {
 
 // Given a subgraph, find all of its external inputs (tensors entering the subgraph).
-// The result is returned in `subgraphInputs`, which is a map indexed by layer-name and with values indicating a set
-// of external input indices.
-Status getSubgraphInputs(
-    const std::vector<nvinfer1::ILayer*>& newLayers,
-    std::unordered_map<std::string, std::set<int32_t>>& subgraphInputs);
+// The result is returned in `subgraphInputs`, which is a map indexed by ITensor (a tensor entering the subgraph) and
+// with values indicating a set of external input indices.
+Status getSubgraphInputs(std::vector<nvinfer1::ILayer*> const& newLayers,
+    std::unordered_map<nvinfer1::ITensor*, std::set<int32_t>>& subgraphInputs);
 
 // Given a subgraph, find all of its external outputs (tensors exiting the subgraph).
-// The result is returned in `subgraphInputs`, which is a map indexed by layer-name and with values indicating a set
-// of external outputs indices.
+// The result is returned in `subgraphInputs`, which is a map indexed by ITensor (a tensor exiting the subgraph) and
+// with values indicating a set of external outputs indices.
 Status getSubgraphOutputs(const std::vector<nvinfer1::ILayer*>& newLayers,
-    std::unordered_map<std::string, std::set<int32_t>>& subgraphOutputs,
+    std::unordered_map<nvinfer1::ITensor*, std::set<int32_t>>& subgraphOutputs,
     const std::vector<std::string>& reportedOutputs);
 
 // Take a snapshot of the network before and after parsing the subgraph and return a list
diff --git a/ImporterContext.cpp b/ImporterContext.cpp
index dfd1d684..94d52615 100644
--- a/ImporterContext.cpp
+++ b/ImporterContext.cpp
@@ -3,6 +3,26 @@
  */
 
 #include "ImporterContext.hpp"
+#include "NvInferVersion.h"
+#include <sstream>
+
+#if !defined(_WIN32)
+#include <dlfcn.h>
+#if defined(__linux__)
+#include <link.h>
+#endif
+#else // defined(_WIN32)
+#include <windows.h>
+#endif // !defined(_WIN32)
+
+#define RT_ASSERT(cond)                                                                                                \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        if (!(cond))                                                                                                   \
+        {                                                                                                              \
+            throw std::runtime_error("Assertion " #cond " failed!");                                                   \
+        }                                                                                                              \
+    } while (0)
 
 namespace onnx2trt
 {
@@ -89,7 +109,7 @@ void ImporterContext::registerTensor(TensorOrWeights tensor, std::string const&
     p.first->second = std::move(tensor);
 }
 
-void ImporterContext::registerLayer(nvinfer1::ILayer* layer, std::string const& basename)
+void ImporterContext::registerLayer(nvinfer1::ILayer* layer, std::string const& basename, ::ONNX_NAMESPACE::NodeProto const* node)
 {
     // No layer will be added for Constant nodes in ONNX.
     if (layer)
@@ -111,6 +131,138 @@ void ImporterContext::registerLayer(nvinfer1::ILayer* layer, std::string const&
             mConstantLayers.insert({uniqueName, static_cast<nvinfer1::IConstantLayer*>(layer)});
         }
     }
+    if (node != nullptr)
+    {
+        processMetadata(*node, layer);
+    }
+}
+
+void ImporterContext::registerLayer(nvinfer1::ILayer* layer, ::ONNX_NAMESPACE::NodeProto const& node)
+{
+    std::string const& basename = getNodeName(node);
+    registerLayer(layer, basename, &node);
+}
+
+namespace
+{
+
+//! Translates a "logical" library name into an OS-dependent DSO or DLL name
+std::string getOSLibraryName(char const* logicalName)
+{
+    std::stringstream libName;
+#if defined(_WIN32)
+    libName << logicalName << ".dll";
+#else
+    libName << "lib" << logicalName << ".so." << NV_TENSORRT_SONAME_MAJOR;
+#endif
+    return libName.str();
+}
+
+//! Platform-agnostic wrapper around dynamic libraries.
+class DynamicLibrary
+{
+public:
+    explicit DynamicLibrary(std::string const& name)
+        : mLibName{name}
+    {
+#if defined(_WIN32)
+        mHandle = LoadLibraryA(name.c_str());
+#else  // defined(_WIN32)
+        int32_t flags{RTLD_LAZY};
+        mHandle = dlopen(name.c_str(), flags);
+#endif // defined(_WIN32)
+
+        if (mHandle == nullptr)
+        {
+            std::string errorStr{};
+#if !defined(_WIN32)
+            errorStr = std::string{" due to "} + std::string{dlerror()};
+#endif
+            throw std::runtime_error("Unable to open library: " + name + errorStr);
+        }
+    }
+
+    DynamicLibrary(DynamicLibrary const&) = delete;
+    DynamicLibrary(DynamicLibrary const&&) = delete;
+
+    ~DynamicLibrary()
+    {
+        try
+        {
+#if defined(_WIN32)
+            RT_ASSERT(static_cast<bool>(FreeLibrary(static_cast<HMODULE>(mHandle))));
+#else
+            RT_ASSERT(dlclose(mHandle) == 0);
+#endif
+        }
+        catch (...)
+        {
+            std::cerr << "Unable to close library: " << mLibName << std::endl;
+        }
+    }
+
+    std::string getFullPath() const
+    {
+        RT_ASSERT(mHandle != nullptr);
+#if defined(__linux__)
+        link_map* linkMap = nullptr;
+        auto const err = dlinfo(mHandle, RTLD_DI_LINKMAP, &linkMap);
+        RT_ASSERT(err == 0 && linkMap != nullptr && linkMap->l_name != nullptr);
+        return std::string{linkMap->l_name};
+#elif defined(_WIN32)
+        constexpr int32_t kMAX_PATH_LEN{4096};
+        std::string path(kMAX_PATH_LEN, '\0'); // since C++11, std::string storage is guaranteed to be contiguous
+        auto const pathLen = GetModuleFileNameA(static_cast<HMODULE>(mHandle), &path[0], kMAX_PATH_LEN);
+        RT_ASSERT(GetLastError() == ERROR_SUCCESS);
+        path.resize(pathLen);
+        path.shrink_to_fit();
+        return path;
+#else
+        RT_ASSERT(!"Unsupported operation: getFullPath()");
+#endif
+    }
+
+private:
+    std::string mLibName{}; //!< Name of the DynamicLibrary
+    void* mHandle{};        //!< Handle to the DynamicLibrary
+};
+
+//! Translates an OS-dependent DSO/DLL name into a path on the filesystem
+std::string getOSLibraryPath(std::string const& osLibName)
+{
+    DynamicLibrary lib{osLibName};
+    return lib.getFullPath();
+}
+
+} // namespace
+
+void ImporterContext::addUsedVCPluginLibrary(
+    ::ONNX_NAMESPACE::NodeProto const& node, char const* pluginName, char const* pluginLib)
+{
+    auto* ctx = this; // For logging
+    auto osPluginLibName = getOSLibraryName(pluginLib);
+    LOG_VERBOSE("Node " << getNodeName(node) << " requires plugin " << pluginName << " which is provided by "
+                        << osPluginLibName);
+    mLogicalVCPluginLibraries.insert(osPluginLibName);
+}
+
+std::vector<std::string> ImporterContext::getUsedVCPluginLibraries()
+{
+    auto* ctx = this; // For logging
+#if defined(_WIN32) || defined(__linux__)
+    std::vector<std::string> ret;
+    ret.reserve(mLogicalVCPluginLibraries.size());
+    for (auto const& l : mLogicalVCPluginLibraries)
+    {
+        auto osLibPath = getOSLibraryPath(l);
+        LOG_VERBOSE("Library " << l << " located on filesystem as " << osLibPath);
+        ret.emplace_back(std::move(osLibPath));
+    }
+    return ret;
+#else
+    LOG_WARNING("getUsedVCPluginLibraries not implemented on platform!");
+    return {};
+#endif
 }
 
 } // namespace onnx2trt
diff --git a/ImporterContext.hpp b/ImporterContext.hpp
index 85b51b28..922b53b1 100644
--- a/ImporterContext.hpp
+++ b/ImporterContext.hpp
@@ -89,6 +89,15 @@ class ImporterContext final : public IImporterContext
     std::string mOnnxFileLocation;       // Keep track of the directory of the parsed ONNX file
     std::unique_ptr<ErrorRecorderWrapper> mErrorWrapper; // error recorder to control TRT errors
     StringMap<nvinfer1::IConstantLayer*> mConstantLayers;
+    bool mConvertINT64Logged{false};
+    bool mConvertINT64OutOfBoundsLogged{false};
+    bool mConvertDoubleLogged{false};
+    bool mConvertDoubleOutOfBoundsLogged{false};
+    nvonnxparser::OnnxParserFlags mOnnxParserFlags; // OnnxParserFlags specified by the parser
+
+    // Logical library names for VC plugin libraries.  This gets translated to library paths
+    // when getUsedVCPluginLibraries() is called.
+    std::set<std::string> mLogicalVCPluginLibraries;
 
     //! Stack of names defined by nested ONNX graphs, with information about how to
     //! restore their associated values when popping back to the surrounding scope.
@@ -161,7 +170,8 @@ class ImporterContext final : public IImporterContext
     void registerTensor(
         TensorOrWeights tensor, std::string const& basename, bool const checkUniqueName = false) override;
 
-    void registerLayer(nvinfer1::ILayer* layer, std::string const& basename) override;
+    void registerLayer(nvinfer1::ILayer* layer, std::string const& basename, ::ONNX_NAMESPACE::NodeProto const* node) override;
+    void registerLayer(nvinfer1::ILayer* layer, ::ONNX_NAMESPACE::NodeProto const& node) override;
 
     nvinfer1::ILogger& logger() override
     {
@@ -266,6 +276,53 @@ class ImporterContext final : public IImporterContext
         return iter->second;
     }
 
+    void setFlags(nvonnxparser::OnnxParserFlags const& onnxParserFlags) override
+    {
+        mOnnxParserFlags = onnxParserFlags;
+    }
+    nvonnxparser::OnnxParserFlags getFlags() const override
+    {
+        return mOnnxParserFlags;
+    }
+
+    virtual void addUsedVCPluginLibrary(
+        ::ONNX_NAMESPACE::NodeProto const& node, char const* pluginName, char const* pluginLib) final;
+
+    virtual std::vector<std::string> getUsedVCPluginLibraries() final;
+
+    bool isConvertINT64Logged()
+    {
+        return mConvertINT64Logged;
+    }
+    void setConvertINT64Logged(bool logged)
+    {
+        mConvertINT64Logged = logged;
+    }
+    bool isConvertINT64OutOfBoundsLogged()
+    {
+        return mConvertINT64OutOfBoundsLogged;
+    }
+    void setConvertINT64OutOfBoundsLogged(bool logged)
+    {
+        mConvertINT64OutOfBoundsLogged = logged;
+    }
+    bool isConvertDoubleLogged()
+    {
+        return mConvertDoubleLogged;
+    }
+    void setConvertDoubleLogged(bool logged)
+    {
+        mConvertDoubleLogged = logged;
+    }
+    bool isConvertDoubleOutOfBoundsLogged()
+    {
+        return mConvertDoubleOutOfBoundsLogged;
+    }
+    void setConvertDoubleOutOfBoundsLogged(bool logged)
+    {
+        mConvertDoubleOutOfBoundsLogged = logged;
+    }
+
 private:
     std::string const& generateUniqueName(std::set<std::string>& namesSet, const std::string& basename)
     {
diff --git a/ModelImporter.cpp b/ModelImporter.cpp
index 720f4ce8..b280a783 100644
--- a/ModelImporter.cpp
+++ b/ModelImporter.cpp
@@ -20,6 +20,16 @@
 namespace onnx2trt
 {
 
+// Helper class and object to shutdown protobuf library upon library unload.
+class ProtobufShutter {
+   public:
+        ~ProtobufShutter()
+        {
+            google::protobuf::ShutdownProtobufLibrary();
+        }
+};
+
+static ProtobufShutter protobufShutter;
 
 // Helper for deserializing INetwork
 Status setTensorLocations(
@@ -209,6 +219,10 @@ Status parseGraph(
             }
         }
 
+        ASSERT((node.output().size() <= static_cast<int32_t>(outputs.size()))
+                && "Node has more output tensors than TRT expected.",
+            ErrorCode::kINVALID_GRAPH);
+
         // Set output names and register outputs with the context.
         std::ostringstream ssOutputs{};
         ssOutputs << nodeName << " [" << node.op_type() << "] outputs: ";
@@ -224,6 +238,20 @@ Status parseGraph(
             {
                 ctx->registerTensor(std::move(output), outputName);
             }
+            // UINT8 is only allowed as network inputs and outputs. Therefore any node that produces an UINT8-typed
+            // output that is not also a graph output is unsupported.
+            if (output.getType() == "UINT8")
+            {
+                bool legalUINT8 = false;
+                for (auto const& graphOutput : graph.output())
+                {
+                    if (graphOutput.name() == outputName)
+                    {
+                        legalUINT8 = true;
+                    }
+                }
+                ASSERT(legalUINT8 && "TensorRT does not support UINT8 types for intermediate tensors!", ErrorCode::kUNSUPPORTED_NODE);
+            }
         }
         LOG_VERBOSE(ssOutputs.str());
     }
@@ -355,13 +383,13 @@ bool ModelImporter::supportsModel(void const* serialized_onnx_model, size_t seri
 
     if (status.is_error())
     {
-        _errors.push_back(status);
+        mErrors.push_back(status);
         return false;
     }
 
     if (model_path)
     {
-        _importer_ctx.setOnnxFileLocation(model_path);
+        mImporterCtx.setOnnxFileLocation(model_path);
     }
 
     bool allSupported{true};
@@ -393,7 +421,7 @@ bool ModelImporter::supportsModel(void const* serialized_onnx_model, size_t seri
             }
         }
     }
-    auto* ctx = &_importer_ctx;
+    auto* ctx = &mImporterCtx;
     auto checkForInput = [&input_node, &ctx](::ONNX_NAMESPACE::NodeProto const& node) {
         for (auto input : node.input())
         {
@@ -474,25 +502,25 @@ bool ModelImporter::supportsOperator(char const* op_name) const
 
 bool ModelImporter::parseWithWeightDescriptors(void const* serialized_onnx_model, size_t serialized_onnx_model_size)
 {
-    _current_node = -1;
+    mCurrentNode = -1;
     // TODO: This function (and its overload below) could do with some cleaning,
     //       particularly wrt error handling.
     // Note: We store a copy of the model so that weight arrays will persist
-    _onnx_models.emplace_back();
-    ::ONNX_NAMESPACE::ModelProto& model = _onnx_models.back();
+    mONNXModels.emplace_back();
+    ::ONNX_NAMESPACE::ModelProto& model = mONNXModels.back();
     bool is_serialized_as_text = false;
     Status status
         = deserialize_onnx_model(serialized_onnx_model, serialized_onnx_model_size, is_serialized_as_text, &model);
     if (status.is_error())
     {
-        _errors.push_back(status);
+        mErrors.push_back(status);
         return false;
     }
     status = this->importModel(model);
     if (status.is_error())
     {
-        status.setNode(_current_node);
-        _errors.push_back(status);
+        status.setNode(mCurrentNode);
+        mErrors.push_back(status);
         return false;
     }
     return true;
@@ -500,7 +528,8 @@ bool ModelImporter::parseWithWeightDescriptors(void const* serialized_onnx_model
 
 bool ModelImporter::parse(void const* serialized_onnx_model, size_t serialized_onnx_model_size, const char* model_path)
 {
-    auto* const ctx = &_importer_ctx;
+    auto* const ctx = &mImporterCtx;
+
     if (ctx->network()->getNbLayers() > 0)
     {
         LOG_ERROR("Parse was called with a non-empty network definition");
@@ -508,17 +537,16 @@ bool ModelImporter::parse(void const* serialized_onnx_model, size_t serialized_o
     }
     if (model_path)
     {
-        _importer_ctx.setOnnxFileLocation(model_path);
+        mImporterCtx.setOnnxFileLocation(model_path);
     }
     return this->parseWithWeightDescriptors(serialized_onnx_model, serialized_onnx_model_size);
 }
 
-Status ModelImporter::importModel(
-    ::ONNX_NAMESPACE::ModelProto const& model)
+Status ModelImporter::importModel(::ONNX_NAMESPACE::ModelProto const& model)
 {
-    ASSERT(!_importer_ctx.network()->hasImplicitBatchDimension() && "This version of the ONNX parser only supports TensorRT INetworkDefinitions with an explicit batch dimension. Please ensure the network was created using the EXPLICIT_BATCH NetworkDefinitionCreationFlag.", ErrorCode::kINVALID_VALUE);
-    auto* ctx = &_importer_ctx;
-    _importer_ctx.clearOpsets();
+    ASSERT(!mImporterCtx.network()->hasImplicitBatchDimension() && "This version of the ONNX parser only supports TensorRT INetworkDefinitions with an explicit batch dimension. Please ensure the network was created using the EXPLICIT_BATCH NetworkDefinitionCreationFlag.", ErrorCode::kINVALID_VALUE);
+    auto* ctx = &mImporterCtx;
+    mImporterCtx.clearOpsets();
 #if ENABLE_STD_PLUGIN
     // Initialize plugin registry
     initLibNvInferPlugins(static_cast<void*>(&ctx->logger()), "");
@@ -531,30 +559,35 @@ Status ModelImporter::importModel(
         // ONNX spec says that the default domain is either an empty string or is "ai.onnx".
         if ((domain.empty() || domain == "ai.onnx") && version < 7)
         {
-            LOG_WARNING("TensorRT supports ONNX graphs generated with at least opset 7. Models using older opsets are not guaranteed to work.");
+            LOG_WARNING(
+                "TensorRT supports ONNX graphs generated with at least opset 7. Models using older opsets are not "
+                "guaranteed to work.");
         }
-        _importer_ctx.addOpset(domain, version);
+        mImporterCtx.addOpset(domain, version);
     }
     ::ONNX_NAMESPACE::GraphProto const& graph = model.graph();
     // Create a dummy tensors so that we can reserve output names. If the output names are encountered elsewhere
     // in the graph, the ctx will know to make the names unique.
     for (::ONNX_NAMESPACE::ValueInfoProto const& output : graph.output())
     {
-        _importer_ctx.registerTensor(TensorOrWeights{}, output.name());
+        mImporterCtx.registerTensor(TensorOrWeights{}, output.name());
     }
 
-    _current_node = -1;
-    CHECK(importInputs(&_importer_ctx, graph, &_importer_ctx.tensors()));
-    CHECK(parseGraph(&_importer_ctx, graph, model.producer_name() == "TensorRT", &_current_node));
+    // Propagate OnnxParserFlags down to the importer context.
+    mImporterCtx.setFlags(getFlags());
+
+    mCurrentNode = -1;
+    CHECK(importInputs(&mImporterCtx, graph, &mImporterCtx.tensors()));
+    CHECK(parseGraph(&mImporterCtx, graph, model.producer_name() == "TensorRT", &mCurrentNode));
 
-    _current_node = -1;
+    mCurrentNode = -1;
     // Mark outputs defined in the ONNX model (unless tensors are user-requested)
     for (::ONNX_NAMESPACE::ValueInfoProto const& output : graph.output())
     {
-        ASSERT((_importer_ctx.tensors().count(output.name())) && "The output tensor was not registered.",
+        ASSERT((mImporterCtx.tensors().count(output.name())) && "The output tensor was not registered.",
             ErrorCode::kINVALID_GRAPH);
         nvinfer1::ITensor* output_tensor_ptr
-            = &convertToTensor(_importer_ctx.tensors().at(output.name()), &_importer_ctx);
+            = &convertToTensor(mImporterCtx.tensors().at(output.name()), &mImporterCtx);
         LOG_VERBOSE("Marking " << output_tensor_ptr->getName() << " as output: " << output.name());
         output_tensor_ptr->setName(output.name().c_str());
 
@@ -563,17 +596,19 @@ Status ModelImporter::importModel(
             // HACK WAR for TRT not allowing input == output
             // TODO: Does this break things by changing the name of the input tensor?
             output_tensor_ptr->setName(("__" + output.name()).c_str());
-            output_tensor_ptr = &identity(&_importer_ctx, output_tensor_ptr).tensor();
+            output_tensor_ptr = &identity(&mImporterCtx, output_tensor_ptr).tensor();
             ASSERT(output_tensor_ptr && "Failed to add an Identity layer.", ErrorCode::kUNSUPPORTED_NODE);
             output_tensor_ptr->setName(output.name().c_str());
         }
 
-        nvinfer1::ITensor** user_output = _importer_ctx.getUserOutput(output.name().c_str());
+        nvinfer1::ITensor** user_output = mImporterCtx.getUserOutput(output.name().c_str());
         if (!user_output)
         {
-            _importer_ctx.network()->markOutput(*output_tensor_ptr);
+            mImporterCtx.network()->markOutput(*output_tensor_ptr);
             nvinfer1::DataType output_trt_dtype;
-            ASSERT(convertDtype(output.type().tensor_type().elem_type(), &output_trt_dtype) && "Failed to convert ONNX date type to TensorRT data type.", ErrorCode::kUNSUPPORTED_NODE);
+            ASSERT(convertDtype(output.type().tensor_type().elem_type(), &output_trt_dtype)
+                    && "Failed to convert ONNX date type to TensorRT data type.",
+                ErrorCode::kUNSUPPORTED_NODE);
             // For INT32 data type, output type must match tensor type
             ASSERT( (output_tensor_ptr->getType() != nvinfer1::DataType::kINT32
                     || output_trt_dtype == nvinfer1::DataType::kINT32) && "For INT32 tensors, the output type must also be INT32.",
@@ -583,13 +618,14 @@ Status ModelImporter::importModel(
         }
     }
     // Return user-requested output tensors
-    for (auto user_output_entry : _importer_ctx.getUserOutputs())
+    for (auto user_output_entry : mImporterCtx.getUserOutputs())
     {
         std::string user_output_name = user_output_entry.first;
         nvinfer1::ITensor** user_output_ptr = user_output_entry.second;
-        ASSERT( (_importer_ctx.tensors().count(user_output_name)) && "The user-requested output was not registered.", ErrorCode::kINVALID_VALUE);
-        TensorOrWeights user_output = _importer_ctx.tensors().at(user_output_name);
-        ASSERT( (user_output.is_tensor()) && "The user-requested output must be a tensor.", ErrorCode::kINVALID_VALUE);
+        ASSERT((mImporterCtx.tensors().count(user_output_name)) && "The user-requested output was not registered.",
+            ErrorCode::kINVALID_VALUE);
+        TensorOrWeights user_output = mImporterCtx.tensors().at(user_output_name);
+        ASSERT((user_output.is_tensor()) && "The user-requested output must be a tensor.", ErrorCode::kINVALID_VALUE);
         *user_output_ptr = &user_output.tensor();
     }
 
@@ -598,25 +634,25 @@ Status ModelImporter::importModel(
         // iterate over all tensors in the network and add them to "tensors" map
         string_map<nvinfer1::ITensor*> tensors;
         string_map<nvinfer1::ILayer*> layers;
-        for (int32_t idx = 0; idx < _importer_ctx.network()->getNbInputs(); ++idx)
+        for (int32_t idx = 0; idx < mImporterCtx.network()->getNbInputs(); ++idx)
         {
-            nvinfer1::ITensor* tensor = _importer_ctx.network()->getInput(idx);
+            nvinfer1::ITensor* tensor = mImporterCtx.network()->getInput(idx);
             if (tensor != nullptr)
             {
                 tensors[tensor->getName()] = tensor;
             }
         }
-        for (int32_t idx = 0; idx < _importer_ctx.network()->getNbOutputs(); ++idx)
+        for (int32_t idx = 0; idx < mImporterCtx.network()->getNbOutputs(); ++idx)
         {
-            nvinfer1::ITensor* tensor = _importer_ctx.network()->getOutput(idx);
+            nvinfer1::ITensor* tensor = mImporterCtx.network()->getOutput(idx);
             if (tensor != nullptr)
             {
                 tensors[tensor->getName()] = tensor;
             }
         }
-        for (int32_t layerIdx = 0; layerIdx < _importer_ctx.network()->getNbLayers(); ++layerIdx)
+        for (int32_t layerIdx = 0; layerIdx < mImporterCtx.network()->getNbLayers(); ++layerIdx)
         {
-            nvinfer1::ILayer* layer = _importer_ctx.network()->getLayer(layerIdx);
+            nvinfer1::ILayer* layer = mImporterCtx.network()->getLayer(layerIdx);
             for (int32_t idx = 0; idx < layer->getNbInputs(); ++idx)
             {
                 nvinfer1::ITensor* tensor = layer->getInput(idx);
@@ -660,12 +696,21 @@ Status ModelImporter::importModel(
         }
     }
 
+    // Regenerate the plugin library list
+    mPluginLibraryList = ctx->getUsedVCPluginLibraries();
+    mPluginLibraryListCStr.clear();
+    mPluginLibraryListCStr.reserve(mPluginLibraryList.size());
+    for (auto const& s : mPluginLibraryList)
+    {
+        mPluginLibraryListCStr.push_back(s.c_str());
+    }
+
     return Status::success();
 }
 
 bool ModelImporter::parseFromFile(char const* onnxModelFile, int32_t verbosity)
 {
-    auto* ctx = &_importer_ctx;
+    auto* ctx = &mImporterCtx;
 
     // Define S_ISREG macro for Windows
 #if !defined(S_ISREG)
@@ -690,7 +735,7 @@ bool ModelImporter::parseFromFile(char const* onnxModelFile, int32_t verbosity)
     }
 
     // Keep track of the absolute path to the ONNX file.
-    _importer_ctx.setOnnxFileLocation(onnxModelFile);
+    mImporterCtx.setOnnxFileLocation(onnxModelFile);
 
     int64_t const opset_version = (onnx_model.opset_import().size() ? onnx_model.opset_import(0).version() : 0);
     LOG_INFO("----------------------------------------------------------------");
@@ -737,4 +782,10 @@ bool ModelImporter::parseFromFile(char const* onnxModelFile, int32_t verbosity)
     return true;
 }
 
+char const* const* ModelImporter::getUsedVCPluginLibraries(int64_t& nbPluginLibs) const noexcept
+{
+    nbPluginLibs = mPluginLibraryListCStr.size();
+    return (nbPluginLibs > 0) ? mPluginLibraryListCStr.data() : nullptr;
+}
+
 } // namespace onnx2trt
diff --git a/ModelImporter.hpp b/ModelImporter.hpp
index c2647307..b1c91bd2 100644
--- a/ModelImporter.hpp
+++ b/ModelImporter.hpp
@@ -23,15 +23,19 @@ class ModelImporter : public nvonnxparser::IParser
     virtual Status importModel(::ONNX_NAMESPACE::ModelProto const& model);
 
 private:
-    ImporterContext _importer_ctx;
-    std::list<::ONNX_NAMESPACE::ModelProto> _onnx_models; // Needed for ownership of weights
-    int _current_node;
-    std::vector<Status> _errors;
+    ImporterContext mImporterCtx;
+    std::vector<std::string> mPluginLibraryList; // Array of strings containing plugin libs
+    std::vector<char const*>
+        mPluginLibraryListCStr; // Array of C-strings corresponding to the strings in mPluginLibraryList
+    std::list<::ONNX_NAMESPACE::ModelProto> mONNXModels; // Needed for ownership of weights
+    int mCurrentNode;
+    std::vector<Status> mErrors;
+    nvonnxparser::OnnxParserFlags mOnnxParserFlags{0};
 
 public:
     ModelImporter(nvinfer1::INetworkDefinition* network, nvinfer1::ILogger* logger)
         : _op_importers(getBuiltinOpImporterMap())
-        , _importer_ctx(network, logger)
+        , mImporterCtx(network, logger)
     {
     }
     bool parseWithWeightDescriptors(void const* serialized_onnx_model, size_t serialized_onnx_model_size) override;
@@ -40,26 +44,53 @@ class ModelImporter : public nvonnxparser::IParser
         SubGraphCollection_t& sub_graph_collection, const char* model_path = nullptr) override;
 
     bool supportsOperator(const char* op_name) const override;
+
+    void setFlags(nvonnxparser::OnnxParserFlags onnxParserFlags) noexcept override
+    {
+        mOnnxParserFlags = onnxParserFlags;
+    }
+    nvonnxparser::OnnxParserFlags getFlags() const noexcept override
+    {
+        return mOnnxParserFlags;
+    }
+
+    void clearFlag(nvonnxparser::OnnxParserFlag onnxParserFlag) noexcept override
+    {
+        mOnnxParserFlags &= ~(1U << static_cast<uint32_t>(onnxParserFlag));
+    }
+
+    void setFlag(nvonnxparser::OnnxParserFlag onnxParserFlag) noexcept override
+    {
+        mOnnxParserFlags |= 1U << static_cast<uint32_t>(onnxParserFlag);
+    }
+
+    bool getFlag(nvonnxparser::OnnxParserFlag onnxParserFlag) const noexcept override
+    {
+        auto flag = 1U << static_cast<uint32_t>(onnxParserFlag);
+        return static_cast<bool>(mOnnxParserFlags & flag);
+    }
+
     void destroy() override
     {
         delete this;
     }
     int32_t getNbErrors() const override
     {
-        return _errors.size();
+        return mErrors.size();
     }
     nvonnxparser::IParserError const* getError(int32_t index) const override
     {
-        assert(0 <= index && index < (int32_t) _errors.size());
-        return &_errors[index];
+        assert(0 <= index && index < (int32_t) mErrors.size());
+        return &mErrors[index];
     }
     void clearErrors() override
     {
-        _errors.clear();
+        mErrors.clear();
     }
 
-    //...LG: Move the implementation to .cpp
     bool parseFromFile(char const* onnxModelFile, int32_t verbosity) override;
+
+    virtual char const* const* getUsedVCPluginLibraries(int64_t& nbPluginLibs) const noexcept override;
 };
 
 } // namespace onnx2trt
diff --git a/NvOnnxParser.h b/NvOnnxParser.h
index 132c12d9..8913ccf1 100644
--- a/NvOnnxParser.h
+++ b/NvOnnxParser.h
@@ -19,8 +19,10 @@
 #define NV_ONNX_PARSER_MINOR 1
 #define NV_ONNX_PARSER_PATCH 0
 
-static const int NV_ONNX_PARSER_VERSION = ((NV_ONNX_PARSER_MAJOR * 10000) + (NV_ONNX_PARSER_MINOR * 100) + NV_ONNX_PARSER_PATCH);
+static constexpr int32_t NV_ONNX_PARSER_VERSION
+    = ((NV_ONNX_PARSER_MAJOR * 10000) + (NV_ONNX_PARSER_MINOR * 100) + NV_ONNX_PARSER_PATCH);
 
+//!
 //! \typedef SubGraph_t
 //!
 //! \brief The data structure containing the parsing capability of
@@ -28,6 +30,7 @@ static const int NV_ONNX_PARSER_VERSION = ((NV_ONNX_PARSER_MAJOR * 10000) + (NV_
 //!
 typedef std::pair<std::vector<size_t>, bool> SubGraph_t;
 
+//!
 //! \typedef SubGraphCollection_t
 //!
 //! \brief The data structure containing all SubGraph_t partitioned
@@ -44,12 +47,13 @@ namespace nvonnxparser
 {
 
 template <typename T>
-inline int32_t EnumMax();
+constexpr inline int32_t EnumMax();
 
-/** \enum ErrorCode
- *
- * \brief the type of parser error
- */
+//!
+//! \enum ErrorCode
+//!
+//! \brief The type of error that the parser may return
+//!
 enum class ErrorCode : int
 {
     kSUCCESS = 0,
@@ -63,140 +67,256 @@ enum class ErrorCode : int
     kUNSUPPORTED_NODE = 8
 };
 
+//!
+//! Maximum number of flags in the ErrorCode enum.
+//!
+//! \see ErrorCode
+//!
 template <>
-inline int32_t EnumMax<ErrorCode>()
+constexpr inline int32_t EnumMax<ErrorCode>()
 {
     return 9;
 }
 
-/** \class IParserError
- *
- * \brief an object containing information about an error
- */
+//!
+//! \brief Represents one or more OnnxParserFlag values using binary OR
+//! operations, e.g., 1U << OnnxParserFlag::kVERSION_COMPATIBLE
+//!
+//! \see IParser::setFlags() and IParser::getFlags()
+//!
+using OnnxParserFlags = uint32_t;
+
+enum class OnnxParserFlag : int32_t
+{
+    //! Parse the ONNX model into the INetworkDefinition with the intention of building a version-compatible engine in
+    //! TensorRT 8.6. This flag is planned to be deprecated in TensorRT 8.7, and removed in TensorRT 9.0. This will
+    //! choose TensorRT's native InstanceNormalization implementation over the plugin implementation. There may be
+    //! performance degradations when this flag is enabled.
+    kVERSION_COMPATIBLE = 0
+};
+
+//!
+//! Maximum number of flags in the OnnxParserFlag enum.
+//!
+//! \see OnnxParserFlag
+//!
+template <>
+constexpr inline int32_t EnumMax<OnnxParserFlag>()
+{
+    return 1;
+}
+
+//!
+//! \class IParserError
+//!
+//! \brief an object containing information about an error
+//!
 class IParserError
 {
 public:
-    /** \brief the error code
-     */
+    //!
+    //!\brief the error code
+    //!
     virtual ErrorCode code() const = 0;
-    /** \brief description of the error
-     */
+    //!
+    //!\brief description of the error
+    //!
     virtual const char* desc() const = 0;
-    /** \brief source file in which the error occurred
-     */
+    //!
+    //!\brief source file in which the error occurred
+    //!
     virtual const char* file() const = 0;
-    /** \brief source line at which the error occurred
-     */
+    //!
+    //!\brief source line at which the error occurred
+    //!
     virtual int line() const = 0;
-    /** \brief source function in which the error occurred
-     */
+    //!
+    //!\brief source function in which the error occurred
+    //!
     virtual const char* func() const = 0;
-    /** \brief index of the ONNX model node in which the error occurred
-     */
+    //!
+    //!\brief index of the ONNX model node in which the error occurred
+    //!
     virtual int node() const = 0;
 
 protected:
     virtual ~IParserError() {}
 };
 
-/** \class IParser
- *
- * \brief an object for parsing ONNX models into a TensorRT network definition
- */
+//!
+//! \class IParser
+//!
+//! \brief an object for parsing ONNX models into a TensorRT network definition
+//!
 class IParser
 {
 public:
-    /** \brief Parse a serialized ONNX model into the TensorRT network.
-     *         This method has very limited diagnostics. If parsing the serialized model
-     *         fails for any reason (e.g. unsupported IR version, unsupported opset, etc.)
-     *         it the user responsibility to intercept and report the error.
-     *         To obtain a better diagnostic, use the parseFromFile method below.
-     *
-     * \param serialized_onnx_model Pointer to the serialized ONNX model
-     * \param serialized_onnx_model_size Size of the serialized ONNX model
-     *        in bytes
-     * \param model_path Absolute path to the model file for loading external weights if required
-     * \return true if the model was parsed successfully
-     * \see getNbErrors() getError()
-     */
-    virtual bool parse(void const* serialized_onnx_model,
-                       size_t serialized_onnx_model_size,
-                       const char* model_path = nullptr)
+    //!
+    //! \brief Parse a serialized ONNX model into the TensorRT network.
+    //!         This method has very limited diagnostics. If parsing the serialized model
+    //!         fails for any reason (e.g. unsupported IR version, unsupported opset, etc.)
+    //!         it the user responsibility to intercept and report the error.
+    //!         To obtain a better diagnostic, use the parseFromFile method below.
+    //!
+    //! \param serialized_onnx_model Pointer to the serialized ONNX model
+    //! \param serialized_onnx_model_size Size of the serialized ONNX model
+    //!        in bytes
+    //! \param model_path Absolute path to the model file for loading external weights if required
+    //! \return true if the model was parsed successfully
+    //! \see getNbErrors() getError()
+    //!
+    virtual bool parse(
+        void const* serialized_onnx_model, size_t serialized_onnx_model_size, const char* model_path = nullptr)
         = 0;
 
-    /** \brief Parse an onnx model file, which can be a binary protobuf or a text onnx model
-     *         calls parse method inside.
-     *
-     * \param File name
-     * \param Verbosity Level
-     *
-     * \return true if the model was parsed successfully
-     *
-     */
+    //!
+    //! \brief Parse an onnx model file, which can be a binary protobuf or a text onnx model
+    //!         calls parse method inside.
+    //!
+    //! \param onnxModelFile name
+    //! \param verbosity Level
+    //!
+    //! \return true if the model was parsed successfully
+    //!
+    //!
     virtual bool parseFromFile(const char* onnxModelFile, int verbosity) = 0;
 
-    /** \brief Check whether TensorRT supports a particular ONNX model.
-     * 	       If the function returns True, one can proceed to engine building
-     * 	       without having to call \p parse or \p parseFromFile.
-     *
-     * \param serialized_onnx_model Pointer to the serialized ONNX model
-     * \param serialized_onnx_model_size Size of the serialized ONNX model
-     *        in bytes
-     * \param sub_graph_collection Container to hold supported subgraphs
-     * \param model_path Absolute path to the model file for loading external weights if required
-     * \return true if the model is supported
-     */
-    virtual bool supportsModel(void const* serialized_onnx_model,
-                               size_t serialized_onnx_model_size,
-                               SubGraphCollection_t& sub_graph_collection,
-                               const char* model_path = nullptr)
+    //!
+    //!\brief Check whether TensorRT supports a particular ONNX model.
+    //! 	       If the function returns True, one can proceed to engine building
+    //! 	       without having to call \p parse or \p parseFromFile.
+    //!
+    //! \param serialized_onnx_model Pointer to the serialized ONNX model
+    //! \param serialized_onnx_model_size Size of the serialized ONNX model
+    //!        in bytes
+    //! \param sub_graph_collection Container to hold supported subgraphs
+    //! \param model_path Absolute path to the model file for loading external weights if required
+    //! \return true if the model is supported
+    //!
+    virtual bool supportsModel(void const* serialized_onnx_model, size_t serialized_onnx_model_size,
+        SubGraphCollection_t& sub_graph_collection, const char* model_path = nullptr)
         = 0;
 
-    /** \brief Parse a serialized ONNX model into the TensorRT network
-     * with consideration of user provided weights
-     *
-     * \param serialized_onnx_model Pointer to the serialized ONNX model
-     * \param serialized_onnx_model_size Size of the serialized ONNX model
-     *        in bytes
-     * \return true if the model was parsed successfully
-     * \see getNbErrors() getError()
-     */
-    virtual bool parseWithWeightDescriptors(
-        void const* serialized_onnx_model, size_t serialized_onnx_model_size)
-        = 0;
+    //!
+    //!\brief Parse a serialized ONNX model into the TensorRT network
+    //! with consideration of user provided weights
+    //!
+    //! \param serialized_onnx_model Pointer to the serialized ONNX model
+    //! \param serialized_onnx_model_size Size of the serialized ONNX model
+    //!        in bytes
+    //! \return true if the model was parsed successfully
+    //! \see getNbErrors() getError()
+    //!
+    virtual bool parseWithWeightDescriptors(void const* serialized_onnx_model, size_t serialized_onnx_model_size) = 0;
 
-    /** \brief Returns whether the specified operator may be supported by the
-     *         parser.
-     *
-     * Note that a result of true does not guarantee that the operator will be
-     * supported in all cases (i.e., this function may return false-positives).
-     *
-     * \param op_name The name of the ONNX operator to check for support
-     */
+    //!
+    //!\brief Returns whether the specified operator may be supported by the
+    //!         parser.
+    //!
+    //! Note that a result of true does not guarantee that the operator will be
+    //! supported in all cases (i.e., this function may return false-positives).
+    //!
+    //! \param op_name The name of the ONNX operator to check for support
+    //!
     virtual bool supportsOperator(const char* op_name) const = 0;
-    /** \brief destroy this object
-     *
-     * \warning deprecated and planned on being removed in TensorRT 10.0
-     */
+
+    //!
+    //!\brief destroy this object
+    //!
+    //! \warning deprecated and planned on being removed in TensorRT 10.0
+    //!
     TRT_DEPRECATED virtual void destroy() = 0;
-    /** \brief Get the number of errors that occurred during prior calls to
-     *         \p parse
-     *
-     * \see getError() clearErrors() IParserError
-     */
+
+    //!
+    //!\brief Get the number of errors that occurred during prior calls to
+    //!         \p parse
+    //!
+    //! \see getError() clearErrors() IParserError
+    //!
     virtual int getNbErrors() const = 0;
-    /** \brief Get an error that occurred during prior calls to \p parse
-     *
-     * \see getNbErrors() clearErrors() IParserError
-     */
+
+    //!
+    //!\brief Get an error that occurred during prior calls to \p parse
+    //!
+    //! \see getNbErrors() clearErrors() IParserError
+    //!
     virtual IParserError const* getError(int index) const = 0;
-    /** \brief Clear errors from prior calls to \p parse
-     *
-     * \see getNbErrors() getError() IParserError
-     */
+
+    //!
+    //!\brief Clear errors from prior calls to \p parse
+    //!
+    //! \see getNbErrors() getError() IParserError
+    //!
     virtual void clearErrors() = 0;
 
+    //!
+    //! \brief Set the parser flags.
+    //!
+    //! The flags are listed in the OnnxParserFlag enum.
+    //!
+    //! \param OnnxParserFlag The flags used when parsing an ONNX model.
+    //!
+    //! \note This function will override the previous set flags, rather than bitwise ORing the new flag.
+    //!
+    //! \see getFlags()
+    //!
+    virtual void setFlags(OnnxParserFlags onnxParserFlags) noexcept = 0;
+
+    //!
+    //! \brief Get the parser flags. Defaults to 0.
+    //!
+    //! \return The parser flags as a bitmask.
+    //!
+    //! \see setFlags()
+    //!
+    virtual OnnxParserFlags getFlags() const noexcept = 0;
+
+    //!
+    //! \brief clear a parser flag.
+    //!
+    //! clears the parser flag from the enabled flags.
+    //!
+    //! \see setFlags()
+    //!
+    virtual void clearFlag(OnnxParserFlag onnxParserFlag) noexcept = 0;
+
+    //!
+    //! \brief Set a single parser flag.
+    //!
+    //! Add the input parser flag to the already enabled flags.
+    //!
+    //! \see setFlags()
+    //!
+    virtual void setFlag(OnnxParserFlag onnxParserFlag) noexcept = 0;
+
+    //!
+    //! \brief Returns true if the parser flag is set
+    //!
+    //! \see getFlags()
+    //!
+    //! \return True if flag is set, false if unset.
+    //!
+    virtual bool getFlag(OnnxParserFlag onnxParserFlag) const noexcept = 0;
+
     virtual ~IParser() noexcept = default;
+
+    //!
+    //! \brief Query the plugin libraries needed to implement operations used by the parser in a version-compatible
+    //! engine.
+    //!
+    //! This provides a list of plugin libraries on the filesystem needed to implement operations
+    //! in the parsed network.  If you are building a version-compatible engine using this network,
+    //! provide this list to IBuilderConfig::setPluginsToSerialize to serialize these plugins along
+    //! with the version-compatible engine, or, if you want to ship these plugin libraries externally
+    //! to the engine, ensure that IPluginRegistry::loadLibrary is used to load these libraries in the
+    //! appropriate runtime before deserializing the corresponding engine.
+    //!
+    //! \param[out] nbPluginLibs Returns the number of plugin libraries in the array, or -1 if there was an error.
+    //! \return Array of `nbPluginLibs` C-strings describing plugin library paths on the filesystem if nbPluginLibs > 0,
+    //! or nullptr otherwise.  This array is owned by the IParser, and the pointers in the array are only valid until
+    //! the next call to parse(), supportsModel(), parseFromFile(), or parseWithWeightDescriptors().
+    //!
+    virtual char const* const* getUsedVCPluginLibraries(int64_t& nbPluginLibs) const noexcept = 0;
 };
 
 } // namespace nvonnxparser
@@ -210,20 +330,21 @@ namespace nvonnxparser
 namespace
 {
 
-/** \brief Create a new parser object
- *
- * \param network The network definition that the parser will write to
- * \param logger The logger to use
- * \return a new parser object or NULL if an error occurred
- *
- * Any input dimensions that are constant should not be changed after parsing,
- * because correctness of the translation may rely on those constants.
- * Changing a dynamic input dimension, i.e. one that translates to -1 in
- * TensorRT, to a constant is okay if the constant is consistent with the model.
- * Each instance of the parser is designed to only parse one ONNX model once.
- *
- * \see IParser
- */
+//!
+//! \brief Create a new parser object
+//!
+//! \param network The network definition that the parser will write to
+//! \param logger The logger to use
+//! \return a new parser object or NULL if an error occurred
+//!
+//! Any input dimensions that are constant should not be changed after parsing,
+//! because correctness of the translation may rely on those constants.
+//! Changing a dynamic input dimension, i.e. one that translates to -1 in
+//! TensorRT, to a constant is okay if the constant is consistent with the model.
+//! Each instance of the parser is designed to only parse one ONNX model once.
+//!
+//! \see IParser
+//!
 inline IParser* createParser(nvinfer1::INetworkDefinition& network, nvinfer1::ILogger& logger)
 {
     return static_cast<IParser*>(createNvOnnxParser_INTERNAL(&network, &logger, NV_ONNX_PARSER_VERSION));
diff --git a/README.md b/README.md
index 5364e31e..952b789e 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ For press and other inquiries, please contact Hector Marinez at hmarinez@nvidia.
 
 ## Supported TensorRT Versions
 
-Development on the `main` branch is for the latest version of [TensorRT 8.5.1](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
+Development on the `main` branch is for the latest version of [TensorRT 8.6.0](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
 
 For previous versions of TensorRT, refer to their respective branches.
 
@@ -48,8 +48,8 @@ Current supported ONNX operators are found in the [operator support matrix](docs
 ### Dependencies
 
  - [Protobuf >= 3.0.x](https://github.com/google/protobuf/releases)
- - [TensorRT 8.5.1](https://developer.nvidia.com/tensorrt)
- - [TensorRT 8.5.1 open source libaries (main branch)](https://github.com/NVIDIA/TensorRT/)
+ - [TensorRT 8.6.0](https://developer.nvidia.com/tensorrt)
+ - [TensorRT 8.6.0 open source libaries (main branch)](https://github.com/NVIDIA/TensorRT/)
 
 ### Building
 
@@ -92,7 +92,7 @@ Python bindings for the ONNX-TensorRT parser are packaged in the shipped `.whl`
 
     python3 -m pip install <tensorrt_install_dir>/python/tensorrt-8.x.x.x-cp<python_ver>-none-linux_x86_64.whl
 
-TensorRT 8.5.1 supports ONNX release 1.12.0. Install it with:
+TensorRT 8.6.0 supports ONNX release 1.12.0. Install it with:
 
     python3 -m pip install onnx==1.12.0
 
diff --git a/TensorOrWeights.hpp b/TensorOrWeights.hpp
index 75cfd8c1..032515b8 100644
--- a/TensorOrWeights.hpp
+++ b/TensorOrWeights.hpp
@@ -107,15 +107,15 @@ class TensorOrWeights
     {
         if (is_tensor())
         {
-            switch(_tensor->getType())
+            switch (_tensor->getType())
             {
-                case nvinfer1::DataType::kFLOAT:return "FLOAT";
-                case nvinfer1::DataType::kHALF: return "HALF";
-                case nvinfer1::DataType::kINT8: return "INT8";
-                case nvinfer1::DataType::kUINT8: return "UINT8";
-                case nvinfer1::DataType::kINT32: return "INT32";
-                case nvinfer1::DataType::kBOOL: return "BOOL";
-                default: return "UNKNOWN TYPE";
+            case nvinfer1::DataType::kFLOAT:return "FLOAT";
+            case nvinfer1::DataType::kHALF: return "HALF";
+            case nvinfer1::DataType::kINT8: return "INT8";
+            case nvinfer1::DataType::kUINT8: return "UINT8";
+            case nvinfer1::DataType::kINT32: return "INT32";
+            case nvinfer1::DataType::kBOOL: return "BOOL";
+            case nvinfer1::DataType::kFP8: return "FP8";
             }
         }
         else
@@ -130,9 +130,9 @@ class TensorOrWeights
             case ::ONNX_NAMESPACE::TensorProto::BOOL: return "BOOL";
             case ::ONNX_NAMESPACE::TensorProto::INT32: return "INT32";
             case ::ONNX_NAMESPACE::TensorProto::INT64: return "INT32";
-            default: return "UNKNOWN TYPE";
             }
         }
+        return "UNKNOWN TYPE";
     }
 };
 
diff --git a/builtin_op_importers.cpp b/builtin_op_importers.cpp
index c92edb72..6a3bb3bc 100644
--- a/builtin_op_importers.cpp
+++ b/builtin_op_importers.cpp
@@ -2,25 +2,7 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-// Include assert.h prior to the below WAR to ensure that assert()
-// definition obeys the NDEBUG define
-#include <assert.h>
-
-// Workaround for NDEBUG causing functional
-// differences in ONNX / protobuf code on aarch64 platforms which leads
-// to corruption.
-#if defined(__aarch64__) && defined(__linux__) && defined(NDEBUG)
-#undef NDEBUG
-#define REDEFINE_NDEBUG
-#endif
-
-#include <onnx/onnx_pb.h>
-
-#if defined(REDEFINE_NDEBUG)
-#define NDEBUG
-#undef REDEFINE_NDEBUG
-#endif
-
+#include "builtin_op_importers.hpp"
 #include "ConditionalHelpers.hpp"
 #include "LoopHelpers.hpp"
 #include "ModelImporter.hpp"
@@ -30,7 +12,6 @@
 #include "OnnxAttrs.hpp"
 #include "RNNHelpers.hpp"
 #include "ShapeTensor.hpp"
-#include "builtin_op_importers.hpp"
 #include "half.h"
 #include "onnx2trt_utils.hpp"
 
@@ -89,7 +70,7 @@ using nvinfer1::DataType;
     do                                                                                                                 \
     {                                                                                                                  \
         nvinfer1::ILayer* layer_ptr = layer;                                                                           \
-        ASSERT(layer_ptr && "Input layer is null.", ErrorCode::kUNSUPPORTED_NODE);                                                               \
+        ASSERT(layer_ptr && "Input layer is null.", ErrorCode::kUNSUPPORTED_NODE);                                     \
         return {{layer_ptr->getOutput(0)}};                                                                            \
     } while (0)
 
@@ -97,7 +78,7 @@ using nvinfer1::DataType;
     do                                                                                                                 \
     {                                                                                                                  \
         TensorOrWeights output = identity(ctx, input);                                                                 \
-        ASSERT(output && "Failed to add an identity layer.", ErrorCode::kUNSUPPORTED_NODE);                                                                  \
+        ASSERT(output && "Failed to add an identity layer.", ErrorCode::kUNSUPPORTED_NODE);                            \
         return {{output}};                                                                                             \
     } while (0)
 
@@ -240,7 +221,7 @@ NodeImportResult batchnormFallback(
              ->getOutput(0),
         *bias, eOp::kSUM);
 
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
 
     RETURN_FIRST_OUTPUT(layer);
 }
@@ -307,26 +288,15 @@ DEFINE_BUILTIN_OP_IMPORTER(Cast)
     nvinfer1::ITensor& tensor = convertToTensor(inputs.at(0), ctx);
     OnnxAttrs attrs(node, ctx);
     // Get data type to cast to.
-    DataType dtype = tensor.getType();
     auto onnxType = attrs.get<int32_t>("to");
     DataType newType{DataType::kFLOAT};
     ASSERT(
         convertDtype(onnxType, &newType) && "Unsupported data type for the Cast operator!", ErrorCode::kINVALID_NODE);
     LOG_VERBOSE("Casting to type: " << newType);
-    // UINT8 type casting is only supported from FP types.
-    if (dtype == DataType::kUINT8)
-    {
-        ASSERT(newType == DataType::kFLOAT || newType == DataType::kHALF, ErrorCode::kUNSUPPORTED_NODE);
-    }
-    if (newType == DataType::kUINT8)
-    {
-        ASSERT(dtype == DataType::kFLOAT || dtype == DataType::kHALF, ErrorCode::kUNSUPPORTED_NODE);
-    }
 
     // Add the layer.
-    nvinfer1::IIdentityLayer* layer = ctx->network()->addIdentity(tensor);
-    layer->setOutputType(0, newType);
-    ctx->registerLayer(layer, getNodeName(node));
+    nvinfer1::ICastLayer* layer = ctx->network()->addCast(tensor, newType);
+    ctx->registerLayer(layer, node);
     RETURN_FIRST_OUTPUT(layer);
 }
 
@@ -385,13 +355,13 @@ DEFINE_BUILTIN_OP_IMPORTER(Celu)
     ASSERT(elementwiseCheck(tempInputs, eOp::kDIV) && "Elementwise layer does not support the given inputs and operator.", ErrorCode::kUNSUPPORTED_NODE);
     nvinfer1::ITensor* combined = inputTensors.at(0);
     auto* divLayer = ctx->network()->addElementWise(*combined, *inputTensors.at(3), eOp::kDIV);
-    ctx->registerLayer(divLayer, getNodeName(node));
+    ctx->registerLayer(divLayer, node);
     ASSERT(divLayer && "Failed to register layer.", ErrorCode::kUNSUPPORTED_NODE);
     combined = divLayer->getOutput(0);
 
     // Calculate exp(x/alpha) -> 4
     nvinfer1::IUnaryLayer* uLayer = ctx->network()->addUnary(*combined, uOp::kEXP);
-    ctx->registerLayer(uLayer, getNodeName(node));
+    ctx->registerLayer(uLayer, node);
     combined = uLayer->getOutput(0);
     inputTensors.push_back(combined);
 
@@ -421,7 +391,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Celu)
                 && "The number of dimensions should remain the same adding inputs.",
             ErrorCode::kUNSUPPORTED_NODE);
         auto* layer = ctx->network()->addElementWise(*firstTensor, *secondTensor, op);
-        ctx->registerLayer(layer, getNodeName(node));
+        ctx->registerLayer(layer, node);
         ASSERT(layer && "Failed to register layer.", ErrorCode::kUNSUPPORTED_NODE);
         inputTensors.push_back(layer->getOutput(0));
     }
@@ -580,7 +550,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Concat)
     int32_t nbDims = inputs.at(0).shape().nbDims;
     CHECK(convertAxis(axis, nbDims));
     auto* layer = ctx->network()->addConcatenation(tensors.data(), tensors.size());
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     ASSERT(layer && "Failed to register layer.", ErrorCode::kUNSUPPORTED_NODE);
     layer->setAxis(axis);
     RETURN_FIRST_OUTPUT(layer);
@@ -754,7 +724,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Conv)
         ErrorCode::kINVALID_NODE);
     layer->setNbGroups(ngroup);
     // Register layer name as well as kernel weights and bias weights (if any)
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     ctx->network()->setWeightsName(kernelWeights, inputs.at(1).weights().getName());
     if (inputs.size() == 3)
     {
@@ -774,6 +744,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Conv)
     LOG_VERBOSE("Using kernel: " << kernelSize << ", strides: " << strides << ", prepadding: " << begPadding
         << ", postpadding: " << endPadding << ", dilations: " << dilations << ", numOutputs: " << noutput);
     LOG_VERBOSE("Convolution output dimensions: " << dims);
+
     return {{tensorPtr}};
 }
 
@@ -966,7 +937,7 @@ DEFINE_BUILTIN_OP_IMPORTER(ConvTranspose)
         << "Post-padding: " << endPadding);
 
     // Register layer, along with refittable kernel weights and bias weights (if any)
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     tensorPtr = layer->getOutput(0);
     dims = tensorPtr->getDimensions();
 
@@ -1025,6 +996,7 @@ DEFINE_BUILTIN_OP_IMPORTER(ConvTranspose)
         tensorPtr = squeezeTensor(ctx, node, *tensorPtr, axes);
         ASSERT(tensorPtr && "Failed to squeeze tensor.", ErrorCode::kUNSUPPORTED_NODE);
     }
+
     return {{tensorPtr}};
 }
 
@@ -1067,7 +1039,8 @@ DEFINE_BUILTIN_OP_IMPORTER(CumSum)
     if (exclusive)
     {
         auto zero = createZeroTensor(ctx, inputSliced);
-        std::vector<nvinfer1::ITensor*> concatTensors = reverse == 1 ? std::vector<nvinfer1::ITensor*>{input, zero} : std::vector<nvinfer1::ITensor*>{zero, input};
+        std::vector<nvinfer1::ITensor*> concatTensors = reverse == 1 ? std::vector<nvinfer1::ITensor*>{input, zero}
+                                                                     : std::vector<nvinfer1::ITensor*>{zero, input};
 
         auto concat = ctx->network()->addConcatenation(concatTensors.data(), concatTensors.size());
         concat->setAxis(axis);
@@ -1169,7 +1142,7 @@ DEFINE_BUILTIN_OP_IMPORTER(DepthToSpace)
 
     auto* firstShuffle = addShuffle(ctx, *tensorPtr, firstShape);
     firstShuffle->setSecondTranspose(perm);
-    ctx->registerLayer(firstShuffle, getNodeName(node));
+    ctx->registerLayer(firstShuffle, node);
     tensorPtr = firstShuffle->getOutput(0);
 
     // Finally reshape to {N, C / (blockSize * blockSize), H * blockSize, W * blockSize};
@@ -1182,7 +1155,7 @@ DEFINE_BUILTIN_OP_IMPORTER(DepthToSpace)
 
 // This is a helper function for QuantizeLinear/DequantizeLinear
 NodeImportResult QuantDequantLinearHelper(
-    IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, std::vector<TensorOrWeights>& inputs, bool isDQ)
+    IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, std::vector<TensorOrWeights>& inputs, bool isDQ, DataType datatype)
 {
     CHECK(notInvalidType(inputs.at(0), {"UINT8"}));
 
@@ -1192,9 +1165,9 @@ NodeImportResult QuantDequantLinearHelper(
         CHECK(notInvalidType(inputs.at(2), {"UINT8"}));
     }
     auto addConstantLayer
-        = [ctx](nvinfer1::INetworkDefinition& network, ShapedWeights const& weights) -> nvinfer1::ITensor* {
+        = [ctx, node](nvinfer1::INetworkDefinition& network, ShapedWeights const& weights) -> nvinfer1::ITensor* {
         nvinfer1::IConstantLayer* constLayer = network.addConstant(weights.shape, weights);
-        ctx->registerLayer(constLayer, weights.getName());
+        ctx->registerLayer(constLayer, weights.getName(), &node);
         network.setWeightsName(weights, weights.getName());
         return constLayer->getOutput(0);
     };
@@ -1203,7 +1176,9 @@ NodeImportResult QuantDequantLinearHelper(
         return inputs.at(i).is_weights() && (ctx->getConstantLayer(inputs.at(i).weights().getName()) == nullptr);
     };
 
-    ASSERT((inputs.size() == 3) && "This version of TensorRT requires 3 inputs for the DequantizeLinear operator.",
+    // Read the optional quantization axis attribute. Set it to the rank of the input tensor if not provided
+    ASSERT((inputs.size() >= 2)
+            && "This version of TensorRT requires at least 2 inputs for the QuantizeLinear/DequantizeLinear operator.",
         nvonnxparser::ErrorCode::kINVALID_NODE);
 
     std::string nodeName = getNodeName(node);
@@ -1233,27 +1208,32 @@ NodeImportResult QuantDequantLinearHelper(
 
     // Input 2 initializes the layer's zero-point.
     nvinfer1::ITensor* zeroPointInput = nullptr;
-    if (newConstantInput(2))
+    bool isFP8 = datatype == DataType::kFP8;
+    if (!isFP8)
     {
-        // Zero-point verification.
-        auto zeroPoint = inputs.at(2).weights();
-        ASSERT(shiftIsAllZeros(zeroPoint) && "TRT only supports symmetric quantization - zeroPt must be all zeros",
+        if (newConstantInput(2))
+        {
+            ASSERT((inputs.size() == 3) && "This version of TensorRT requires 3 inputs for the INT8 QuantizeLinear/DequantizeLinear operator.",
+                nvonnxparser::ErrorCode::kINVALID_NODE);
+            // Zero-point verification.
+            auto zeroPoint = inputs.at(2).weights();
+            ASSERT(shiftIsAllZeros(zeroPoint) && "TensorRT only supports symmetric quantization. The zero point for the QuantizeLinear/DequantizeLinear operator must be all zeros.",
+                nvonnxparser::ErrorCode::kINVALID_NODE);
+            // Convert the zero-point to float because TRT uses float for zero-point.
+            auto fpZeroPoint = createZeroShifts(zeroPoint, ::ONNX_NAMESPACE::TensorProto::FLOAT, ctx);
+            fpZeroPoint.setName(zeroPoint.getName());
+            zeroPointInput = addConstantLayer(*ctx->network(), fpZeroPoint);
+        }
+        else
+        {
+            zeroPointInput = &convertToTensor(inputs.at(2), ctx);
+        }
+        auto const zeroPointSize = volume(zeroPointInput->getDimensions());
+        // ONNX may represent a scalar using either 0-D or 1-D, so compare sizes instead of shapes.
+        ASSERT(zeroPointSize == scaleSize && "The scale and zero point must have the same volume.",
             nvonnxparser::ErrorCode::kINVALID_NODE);
-        // Convert the zero-point to Float because that's TRT uses float for zero-point.
-        auto fpZeroPoint = createZeroShifts(zeroPoint, ::ONNX_NAMESPACE::TensorProto::FLOAT, ctx);
-        fpZeroPoint.setName(zeroPoint.getName());
-        zeroPointInput = addConstantLayer(*ctx->network(), fpZeroPoint);
     }
-    else
-    {
-        zeroPointInput = &convertToTensor(inputs.at(2), ctx);
-    }
-    auto const zeroPointSize = volume(zeroPointInput->getDimensions());
-    // ONNX may represent a scalar using either 0-D or 1-D, so compare sizes instead of shapes.
-    ASSERT(zeroPointSize == scaleSize && "The scale and zero-point must have the same size",
-        nvonnxparser::ErrorCode::kINVALID_NODE);
 
-    // Read the optional quantization axis attribute. Set it to the rank of the input tensor if not provided
     OnnxAttrs attrs(node, ctx);
     int32_t const nbDims = dataInput.getDimensions().nbDims;
     int32_t axis = attrs.get<int32_t>("axis", nbDims);
@@ -1288,6 +1268,10 @@ NodeImportResult QuantDequantLinearHelper(
         ASSERT(dq && "Failed to create Dequantize layer.", ErrorCode::kUNSUPPORTED_NODE);
         dq->setAxis(axis);
         layer = dq;
+        if (isFP8)
+        {
+            layer->setPrecision(DataType::kFP8);
+        }
     }
     else
     {
@@ -1296,18 +1280,41 @@ NodeImportResult QuantDequantLinearHelper(
         ASSERT(q && "Failed to create Quantize layer.", ErrorCode::kUNSUPPORTED_NODE);
         q->setAxis(axis);
         layer = q;
+        if (isFP8)
+        {
+            layer->setOutputType(0, DataType::kFP8);
+        }
     }
 
     layer->setName(nodeName.c_str());
-    layer->setInput(2, *zeroPointInput);
+    if (zeroPointInput)
+    {
+        layer->setInput(2, *zeroPointInput);
+    }
 
     // Return layer output
     RETURN_FIRST_OUTPUT(layer);
 }
 
+
+DEFINE_BUILTIN_OP_IMPORTER(QuantizeLinear)
+{
+    return QuantDequantLinearHelper(ctx, node, inputs, false /*isDQ*/, DataType::kINT8);
+}
+
 DEFINE_BUILTIN_OP_IMPORTER(DequantizeLinear)
 {
-    return QuantDequantLinearHelper(ctx, node, inputs, true /*isDQ*/);
+    return QuantDequantLinearHelper(ctx, node, inputs, true /*isDQ*/, DataType::kINT8);
+}
+
+DEFINE_BUILTIN_OP_IMPORTER(TRT_FP8QuantizeLinear)
+{
+    return QuantDequantLinearHelper(ctx, node, inputs, false /*isDQ*/, DataType::kFP8);
+}
+
+DEFINE_BUILTIN_OP_IMPORTER(TRT_FP8DequantizeLinear)
+{
+    return QuantDequantLinearHelper(ctx, node, inputs, true /*isDQ*/, DataType::kFP8);
 }
 
 DECLARE_BUILTIN_OP_IMPORTER(Mul);
@@ -1402,7 +1409,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Einsum)
     }
     auto nbInputs = static_cast<int32_t>(inputTensors.size());
     nvinfer1::IEinsumLayer* einsumLayer = ctx->network()->addEinsum(inputTensors.data(), nbInputs, equation.c_str());
-    ctx->registerLayer(einsumLayer, getNodeName(node));
+    ctx->registerLayer(einsumLayer, node);
 
     RETURN_FIRST_OUTPUT(einsumLayer);
 }
@@ -1458,7 +1465,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Expand)
     ShapeTensor const strides = min(ctx, one, sub(ctx, newDims, one));
 
     nvinfer1::ISliceLayer* sliceLayer = addSlice(ctx, newInputTensor, starts, sizes, strides);
-    ctx->registerLayer(sliceLayer, getNodeName(node));
+    ctx->registerLayer(sliceLayer, node);
 
     RETURN_FIRST_OUTPUT(sliceLayer);
 }
@@ -1504,7 +1511,7 @@ DEFINE_BUILTIN_OP_IMPORTER(EyeLike)
     std::memcpy(tempWeights.values, values.data(), values.size() * sizeof(int));
     auto* layer = ctx->network()->addConstant(dims, tempWeights);
     layer->setOutputType(0, DataType::kINT32);
-    ctx->registerLayer(layer, node.name());
+    ctx->registerLayer(layer, node);
 
     if (dtype != DataType::kINT32)
     {
@@ -1550,7 +1557,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Gather)
     CHECK(convertAxis(axis, nbDims));
     LOG_VERBOSE("Using Gather axis: " << axis);
     auto* layer = ctx->network()->addGather(data, indices, axis);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     RETURN_FIRST_OUTPUT(layer);
 }
 
@@ -1569,7 +1576,7 @@ DEFINE_BUILTIN_OP_IMPORTER(GatherElements)
 
     auto* layer = ctx->network()->addGatherV2(data, indices, nvinfer1::GatherMode::kELEMENT);
     layer->setGatherAxis(axis);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     RETURN_FIRST_OUTPUT(layer);
 }
 
@@ -1584,7 +1591,7 @@ DEFINE_BUILTIN_OP_IMPORTER(GatherND)
 
     auto* layer = ctx->network()->addGatherV2(data, indices, nvinfer1::GatherMode::kND);
     layer->setNbElementWiseDims(nbElementWiseDims);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     RETURN_FIRST_OUTPUT(layer);
 }
 
@@ -1620,7 +1627,8 @@ DEFINE_BUILTIN_OP_IMPORTER(Gemm)
     LOG_VERBOSE("Using opA: " << static_cast<int>(opA) << " opB: " << static_cast<int>(opB));
 
     nvinfer1::IMatrixMultiplyLayer* matmul = ctx->network()->addMatrixMultiply(inputA, opA, inputB, opB);
-    ctx->registerLayer(matmul, getNodeName(node));
+    ctx->registerLayer(matmul, node);
+
     nvinfer1::ITensor* matmulTensor = matmul->getOutput(0);
 
     // Scale A*B if needed.
@@ -1708,6 +1716,11 @@ DEFINE_BUILTIN_OP_IMPORTER(GreaterOrEqual)
         /*greater*/ true)}};
 }
 
+DEFINE_BUILTIN_OP_IMPORTER(GroupNormalization)
+{
+    return normalizationHelper(ctx, node, inputs);
+}
+
 // singlePassShape is the shape of the output from a single pass.
 nvinfer1::ITensor* concatenateRNNOutputs(IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node,
     nvinfer1::ILoop* loop, nvinfer1::ITensor* singlePassShape, nvinfer1::ITensor* sequenceLength,
@@ -1951,7 +1964,7 @@ DEFINE_BUILTIN_OP_IMPORTER(GRU)
     LOG_VERBOSE("Initial hidden state shape: " << initialHidden->getDimensions());
 
     nvinfer1::IRecurrenceLayer* Ht1 = loop->addRecurrence(*initialHidden);
-    ctx->registerLayer(Ht1, getNodeName(node));
+    ctx->registerLayer(Ht1, node);
     LOG_VERBOSE("Hidden state shape: " << Ht1->getOutput(0)->getDimensions());
 
     // Compute stackedZR(t) = f(X(t) * W[zr]^T + H(t-1) * R[zr]^T + (Wb[zr] + Rb[zr])). stackedZR(t) has shape
@@ -2103,7 +2116,7 @@ DEFINE_BUILTIN_OP_IMPORTER(HardSigmoid)
 DEFINE_BUILTIN_OP_IMPORTER(Identity)
 {
     auto* layer = ctx->network()->addIdentity(convertToTensor(inputs.at(0), ctx));
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     RETURN_FIRST_OUTPUT(layer);
 }
 
@@ -2185,66 +2198,60 @@ DEFINE_BUILTIN_OP_IMPORTER(ImageScaler)
     // Finally add the scale layer.
     auto layer = ctx->network()->addScale(
         tensor, nvinfer1::ScaleMode::kCHANNEL, shiftWeights, scaleWeights, nvinfer1::Weights{});
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     RETURN_FIRST_OUTPUT(layer);
 }
 
 DEFINE_BUILTIN_OP_IMPORTER(InstanceNormalization)
 {
-    // Scales and biases must be initializers
-    ASSERT(inputs.at(1).is_weights() && "The scale tensor is required to be an initializer.",
-        ErrorCode::kUNSUPPORTED_NODE);
-    ASSERT(
-        inputs.at(2).is_weights() && "The bias tensor is required to be an initializer.", ErrorCode::kUNSUPPORTED_NODE);
-    nvinfer1::ITensor* tensorPtr = &convertToTensor(inputs.at(0), ctx);
-    int32_t nbDims = tensorPtr->getDimensions().nbDims;
-    ASSERT(nbDims >= 3 && nbDims <= 5 && "TensorRT only supports InstanceNormalization on 3D, 4D, or 5D tensors!",
-        ErrorCode::kUNSUPPORTED_NODE);
-
-    const bool needToExpandDims = (nbDims == 3);
-    if (needToExpandDims)
+    // Choose plugin implementation for non-VC engines, and native implementation
+    // for VC engines.
+    auto flags = ctx->getFlags();
+    uint32_t vcFlag = 1U << static_cast<uint32_t>(nvonnxparser::OnnxParserFlag::kVERSION_COMPATIBLE);
+    if (flags & vcFlag)
     {
-        // Expand spatial dims from 1D to 2D
-        std::vector<int32_t> const axes{3};
-        tensorPtr = unsqueezeTensor(ctx, node, *tensorPtr, axes);
-        ASSERT(tensorPtr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE);
+        return normalizationHelper(ctx, node, inputs);
     }
-    auto scale_weights = inputs.at(1).weights();
-    auto bias_weights = inputs.at(2).weights();
-    OnnxAttrs attrs(node, ctx);
-    float epsilon = attrs.get("epsilon", 1e-5f);
-    int32_t const relu{0};  // the ONNX instance norm op does not use the relu parameter
-    float const alpha{0.f}; // the ONNX instance norm op does not use the alpha parameter
-
-    // Populate instanceNormalization plugin properties.
-    std::string const pluginName = "InstanceNormalization_TRT";
-    std::string const pluginVersion = "1";
-    std::vector<nvinfer1::PluginField> f;
-    f.emplace_back("epsilon", &epsilon, nvinfer1::PluginFieldType::kFLOAT32, 1);
-    f.emplace_back("scales", scale_weights.values, nvinfer1::PluginFieldType::kFLOAT32, scale_weights.count());
-    f.emplace_back("bias", bias_weights.values, nvinfer1::PluginFieldType::kFLOAT32, bias_weights.count());
-    f.emplace_back("relu", &relu, nvinfer1::PluginFieldType::kINT32, 1);
-    f.emplace_back("alpha", &alpha, nvinfer1::PluginFieldType::kFLOAT32, 1);
+    return instanceNormPluginHelper(ctx, node, inputs);
+}
 
-    // Create plugin from registry
-    auto const plugin = createPlugin(getNodeName(node), importPluginCreator(pluginName, pluginVersion), f);
+DEFINE_BUILTIN_OP_IMPORTER(IsInf)
+{
+    OnnxAttrs attrs{node, ctx};
+    int32_t const detectNegative = attrs.get<int32_t>("detect_negative", 1);
+    int32_t const detectPositive = attrs.get<int32_t>("detect_positive", 1);
 
-    ASSERT(plugin != nullptr && "InstanceNormalization plugin was not found in the plugin registry!",
-        ErrorCode::kUNSUPPORTED_NODE);
+    if (detectNegative && detectPositive)
+    {
+        return unaryHelper(ctx, node, inputs.at(0), nvinfer1::UnaryOperation::kISINF);
+    }
 
-    auto* layer = ctx->network()->addPluginV2(&tensorPtr, 1, *plugin);
-    ctx->registerLayer(layer, getNodeName(node));
-    tensorPtr = layer->getOutput(0);
+    auto& input = convertToTensor(inputs.at(0), ctx);
+    auto inputDims = input.getDimensions();
+    nvinfer1::Dims scalarDims{inputDims.nbDims};
+    std::fill(scalarDims.d, scalarDims.d + scalarDims.nbDims, 1);
+    auto& zeroTensor = *addConstantScalar(ctx, 0.F, ::ONNX_NAMESPACE::TensorProto::FLOAT, scalarDims)->getOutput(0);
 
-    if (needToExpandDims)
+    if (detectNegative)
     {
-        // Un-expand spatial dims back to 1D
-        std::vector<int32_t> const axes{3};
-        tensorPtr = squeezeTensor(ctx, node, *tensorPtr, axes);
-        ASSERT(tensorPtr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE);
+        auto* isNeg
+            = ctx->network()->addElementWise(input, zeroTensor, nvinfer1::ElementWiseOperation::kLESS)->getOutput(0);
+        auto* isInf = ctx->network()->addUnary(input, nvinfer1::UnaryOperation::kISINF)->getOutput(0);
+        RETURN_FIRST_OUTPUT(ctx->network()->addElementWise(*isNeg, *isInf, nvinfer1::ElementWiseOperation::kAND));
     }
-
-    return {{tensorPtr}};
+    if (detectPositive)
+    {
+        auto* isPos
+            = ctx->network()->addElementWise(input, zeroTensor, nvinfer1::ElementWiseOperation::kGREATER)->getOutput(0);
+        auto* isInf = ctx->network()->addUnary(input, nvinfer1::UnaryOperation::kISINF)->getOutput(0);
+        RETURN_FIRST_OUTPUT(ctx->network()->addElementWise(*isPos, *isInf, nvinfer1::ElementWiseOperation::kAND));
+    }
+    // In this case, always return false.
+    auto* isPos
+        = ctx->network()->addElementWise(input, zeroTensor, nvinfer1::ElementWiseOperation::kGREATER)->getOutput(0);
+    auto* isNeg
+        = ctx->network()->addElementWise(input, zeroTensor, nvinfer1::ElementWiseOperation::kLESS)->getOutput(0);
+    RETURN_FIRST_OUTPUT(ctx->network()->addElementWise(*isPos, *isNeg, nvinfer1::ElementWiseOperation::kAND));
 }
 
 DEFINE_BUILTIN_OP_IMPORTER(IsNaN)
@@ -2260,6 +2267,41 @@ DEFINE_BUILTIN_OP_IMPORTER(IsNaN)
     return unaryHelper(ctx, node, equalRet, nvinfer1::UnaryOperation::kNOT);
 }
 
+DEFINE_BUILTIN_OP_IMPORTER(LayerNormalization)
+{
+    auto* input = &convertToTensor(inputs.at(0), ctx);
+    auto* scale = &convertToTensor(inputs.at(1), ctx);
+    auto biasType = input->getType() == nvinfer1::DataType::kFLOAT ? ::ONNX_NAMESPACE::TensorProto::FLOAT
+                                                                   : ::ONNX_NAMESPACE::TensorProto::FLOAT16;
+    auto* bias
+        = inputs.size() == 3 ? &convertToTensor(inputs.at(2), ctx) : addConstantScalar(ctx, 0, biasType)->getOutput(0);
+
+    OnnxAttrs attrs(node, ctx);
+    float epsilon = attrs.get("epsilon", 1e-5f);
+    int32_t axis = attrs.get("axis", -1);
+    nvinfer1::DataType computeType = nvinfer1::DataType::kFLOAT;
+    convertDtype(attrs.get<int32_t>("stash_type", 1), &computeType);
+
+    int32_t const nbDims = input->getDimensions().nbDims;
+    CHECK(convertAxis(axis, nbDims));
+    uint32_t axesMask{0};
+
+    // Populate axesMask with axis values
+    for (int32_t i = axis; i < nbDims; i++)
+    {
+        axesMask |= 1 << i;
+    }
+
+    // Broadcast scale and bias to input size
+    broadcastTensors(ctx, input, scale);
+    broadcastTensors(ctx, input, bias);
+
+    auto* layer = ctx->network()->addNormalization(*input, *scale, *bias, axesMask);
+    layer->setEpsilon(epsilon);
+    layer->setComputePrecision(computeType);
+    RETURN_FIRST_OUTPUT(layer);
+}
+
 DEFINE_BUILTIN_OP_IMPORTER(LeakyRelu)
 {
     OnnxAttrs attrs(node, ctx);
@@ -2300,7 +2342,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Loop)
     constexpr int32_t NB_NON_STATE_INPUTS = 2; // First 2 inputs are trip count and condition respectively.
     constexpr int32_t NB_DISCARDED_OUTPUTS
         = 1; // First output is the updated value of the condition, and is ignored by the outer loop node.
-    constexpr int32_t MAX_SCAN_OUTPUT_LENGTH = 1024; // Maximum length for scan outputs if trip count is not set.
+    constexpr int32_t DUMMY_SCAN_OUTPUT_LENGTH = 1;
     ASSERT((inputs.size() >= 2) && "The Loop operator requires at least 2 inputs.", ErrorCode::kINVALID_NODE);
     OnnxAttrs attrs(node, ctx);
     int32_t const nbInputs = node.input().size();
@@ -2330,7 +2372,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Loop)
             ctx->loopTensors()[body.input(0).name()] = node.input(0);
             loop->addTripLimit(*tripLimit, nvinfer1::TripLimit::kCOUNT);
             // First graph input is iteration_num, so create a loop counter
-            auto counter = addLoopCounter(ctx, loop, 0);
+            auto counter = convertToScalar(ctx, addLoopCounter(ctx, loop, 0));
             ctx->registerTensor(counter, body.input(0).name());
         }
     }
@@ -2349,6 +2391,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Loop)
         stateVars.emplace_back(loop->addRecurrence(convertToTensor(inputs[i], ctx)));
         ctx->loopTensors()[body.input(i).name()] = node.input(i);
         ctx->registerTensor(TensorOrWeights{stateVars.back()->getOutput(0)}, body.input(i).name());
+        LOG_VERBOSE("Mapped Loop node input " << node.input(i) << " to loop body input " << body.input(i).name());
     }
 
     // Loop body
@@ -2398,9 +2441,9 @@ DEFINE_BUILTIN_OP_IMPORTER(Loop)
         }
         else
         {
-            trtScanOut->setInput(
-                1, *addConstantScalar(ctx, MAX_SCAN_OUTPUT_LENGTH, ::ONNX_NAMESPACE::TensorProto_DataType_INT32)
-                        ->getOutput(0));
+            trtScanOut->setInput(1,
+                *addConstantScalar(ctx, DUMMY_SCAN_OUTPUT_LENGTH, ::ONNX_NAMESPACE::TensorProto_DataType_INT32)
+                     ->getOutput(0));
         }
         nodeOutputs.emplace_back(trtScanOut->getOutput(0));
     }
@@ -2417,7 +2460,7 @@ DEFINE_BUILTIN_OP_IMPORTER(LRN)
     float beta = attrs.get<float>("beta", 0.75f);
     float bias = attrs.get<float>("bias", 1.0f);
     auto* layer = ctx->network()->addLRN(tensor, size, alpha, beta, bias);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     RETURN_FIRST_OUTPUT(layer);
 }
 
@@ -2537,7 +2580,7 @@ DEFINE_BUILTIN_OP_IMPORTER(LSTM)
 
     // H(t-1)
     nvinfer1::IRecurrenceLayer* Ht1 = loop->addRecurrence(*initialHidden);
-    ctx->registerLayer(Ht1, getNodeName(node));
+    ctx->registerLayer(Ht1, node);
     LOG_VERBOSE("Hidden state shape: " << Ht1->getOutput(0)->getDimensions());
 
     // C(t-1)
@@ -2732,46 +2775,46 @@ DEFINE_BUILTIN_OP_IMPORTER(LpNormalization)
     if (p == 1) {
         // abs(x)
         nvinfer1::IUnaryLayer* absLayer = ctx->network()->addUnary(*input, uOp::kABS);
-        ctx->registerLayer(absLayer, getNodeName(node));
+        ctx->registerLayer(absLayer, node);
         norm = absLayer->getOutput(0);
 
         // norm coeff = sum(abs(x)) along axis dimension
         nvinfer1::IReduceLayer* reduceLayer = ctx->network()->addReduce(*norm, rOp::kSUM, 1 << axis, true);
-        ctx->registerLayer(reduceLayer, getNodeName(node));
+        ctx->registerLayer(reduceLayer, node);
         norm = reduceLayer->getOutput(0);
     }
     else if (p == 2)
     {
         // x^2
         auto* sqrLayer = ctx->network()->addElementWise(*input, *input, eOp::kPROD);
-        ctx->registerLayer(sqrLayer, getNodeName(node));
+        ctx->registerLayer(sqrLayer, node);
         norm = sqrLayer->getOutput(0);
 
         // sum(x^2) along axis dimension
         nvinfer1::IReduceLayer* reduceLayer = ctx->network()->addReduce(*norm, rOp::kSUM, 1 << axis, true);
-        ctx->registerLayer(reduceLayer, getNodeName(node));
+        ctx->registerLayer(reduceLayer, node);
         norm = reduceLayer->getOutput(0);
 
         // norm coeff = sqrt(sum(x^2))
         nvinfer1::IUnaryLayer* sqrtLayer = ctx->network()->addUnary(*norm, uOp::kSQRT);
-        ctx->registerLayer(sqrtLayer, getNodeName(node));
+        ctx->registerLayer(sqrtLayer, node);
         norm = sqrtLayer->getOutput(0);
     }
 
     // norm coeff |= 1 (change 0s to 1s, leave all other values same)
     nvinfer1::IElementWiseLayer* maskLayer = ctx->network()->addElementWise(*norm, *zerosTensor, eOp::kEQUAL);
-    ctx->registerLayer(maskLayer, getNodeName(node));
+    ctx->registerLayer(maskLayer, node);
     nvinfer1::ITensor* mask = maskLayer->getOutput(0);
     mask = castHelper(ctx, mask, dt);
     auto* combinedLayer = ctx->network()->addElementWise(*norm, *mask, eOp::kSUM);
-    ctx->registerLayer(combinedLayer, getNodeName(node));
+    ctx->registerLayer(combinedLayer, node);
     norm = combinedLayer->getOutput(0);
 
     // x/(norm coeff)
     // norm tensor is broadcast along axis dimension to match shape of input
     auto *layer = ctx->network()->addElementWise(
         *input, *norm, eOp::kDIV);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     ASSERT(layer && "Failed to register layer.", ErrorCode::kUNSUPPORTED_NODE);
 
     RETURN_FIRST_OUTPUT(layer);
@@ -2814,12 +2857,12 @@ DEFINE_BUILTIN_OP_IMPORTER(LpPool)
     if (p == 1) {
         // x' = abs(x)
         nvinfer1::IUnaryLayer* absLayer = ctx->network()->addUnary(*input, uOp::kABS);
-        ctx->registerLayer(absLayer, getNodeName(node));
+        ctx->registerLayer(absLayer, node);
         output = absLayer->getOutput(0);
     } else if (p == 2) {
         // x' = x^2
         auto* sqrLayer = ctx->network()->addElementWise(*input, *input, eOp::kPROD);
-        ctx->registerLayer(sqrLayer, getNodeName(node));
+        ctx->registerLayer(sqrLayer, node);
         output = sqrLayer->getOutput(0);
     }
 
@@ -2830,19 +2873,19 @@ DEFINE_BUILTIN_OP_IMPORTER(LpPool)
     poolLayer->setPostPadding(endPadding);
     poolLayer->setStrideNd(strides);
     poolLayer->setAverageCountExcludesPadding(exclude_padding);
-    ctx->registerLayer(poolLayer, getNodeName(node));
+    ctx->registerLayer(poolLayer, node);
     output = poolLayer->getOutput(0);
 
     // pool_sum = pool_avg(x')*kernel_size
     auto* correctedSumLayer = ctx->network()->addElementWise(*output, *kernelSzTensor, eOp::kPROD);
-    ctx->registerLayer(correctedSumLayer, getNodeName(node));
+    ctx->registerLayer(correctedSumLayer, node);
     output = correctedSumLayer->getOutput(0);
 
     // if p == 1, output = pool_sum
     // if p == 2, output = sqrt(pool_sum)
     if (p == 2) {
         nvinfer1::IUnaryLayer* sqrtLayer = ctx->network()->addUnary(*output, uOp::kSQRT);
-        ctx->registerLayer(sqrtLayer, getNodeName(node));
+        ctx->registerLayer(sqrtLayer, node);
         output = sqrtLayer->getOutput(0);
     }
     return {{output}};
@@ -2882,7 +2925,7 @@ DEFINE_BUILTIN_OP_IMPORTER(MatMul)
     nvinfer1::MatrixOperation opB = getMatrixOp(*inputB);
 
     nvinfer1::IMatrixMultiplyLayer* matmul = ctx->network()->addMatrixMultiply(*inputA, opA, *inputB, opB);
-    ctx->registerLayer(matmul, getNodeName(node));
+    ctx->registerLayer(matmul, node);
 
     auto outputTensor = matmul->getOutput(0);
     if (needSqueezeHead)
@@ -2897,6 +2940,7 @@ DEFINE_BUILTIN_OP_IMPORTER(MatMul)
         std::vector<int32_t> axes{outputTensor->getDimensions().nbDims - 1};
         outputTensor = squeezeTensor(ctx, node, *outputTensor, axes);
     }
+
     return {{outputTensor}};
 }
 
@@ -2965,27 +3009,27 @@ DEFINE_BUILTIN_OP_IMPORTER(MeanVarianceNormalization)
 
     // mean(x) along axes direction
     auto* reduceLayer = ctx->network()->addReduce(*input, rOp::kAVG, axesMask, true);
-    ctx->registerLayer(reduceLayer, getNodeName(node));
+    ctx->registerLayer(reduceLayer, node);
     auto* meanX = reduceLayer->getOutput(0);
 
     // numerator: x-mean(x)
     auto* numSubLayer = ctx->network()->addElementWise(*input, *meanX, eOp::kSUB);
-    ctx->registerLayer(numSubLayer, getNodeName(node));
+    ctx->registerLayer(numSubLayer, node);
     auto* numerator = numSubLayer->getOutput(0);
 
     // (x-mean(x))^2
     auto* sqrLayer = ctx->network()->addElementWise(*numerator, *numerator, eOp::kPROD);
-    ctx->registerLayer(sqrLayer, getNodeName(node));
+    ctx->registerLayer(sqrLayer, node);
     auto* sqrNumerator = sqrLayer->getOutput(0);
 
     // mean((x-mean(x))^2)
     auto* meanLayer = ctx->network()->addReduce(*sqrNumerator, rOp::kAVG, axesMask, true);
-    ctx->registerLayer(meanLayer, getNodeName(node));
+    ctx->registerLayer(meanLayer, node);
     auto* variance = meanLayer->getOutput(0);
 
     // sqrt(mean((x-mean(x))^2))
     nvinfer1::IUnaryLayer* sqrtLayer = ctx->network()->addUnary(*variance, uOp::kSQRT);
-    ctx->registerLayer(sqrtLayer, getNodeName(node));
+    ctx->registerLayer(sqrtLayer, node);
     auto* stdDev = sqrtLayer->getOutput(0);
 
     // denominator: avoid division by zero
@@ -2993,12 +3037,12 @@ DEFINE_BUILTIN_OP_IMPORTER(MeanVarianceNormalization)
     std::fill(scalarShape.d, scalarShape.d + scalarShape.nbDims, 1);
     auto* epsilonTensor = addConstantScalar(ctx, 1e-9f, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT, scalarShape)->getOutput(0);
     auto* addEpsLayer = ctx->network()->addElementWise(*stdDev, *epsilonTensor, eOp::kSUM);
-    ctx->registerLayer(addEpsLayer, getNodeName(node));
+    ctx->registerLayer(addEpsLayer, node);
     stdDev = addEpsLayer->getOutput(0);
 
     // division numerator/standard-deviation
     auto* divLayer = ctx->network()->addElementWise(*numerator, *stdDev, eOp::kDIV);
-    ctx->registerLayer(divLayer, getNodeName(node));
+    ctx->registerLayer(divLayer, node);
 
     ASSERT(divLayer && "Failed to register layer.", ErrorCode::kUNSUPPORTED_NODE);
 
@@ -3033,7 +3077,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Mod)
         // Result = input0 - (input1 * floorDiv(input0, input1))
         nvinfer1::IElementWiseLayer* resultLayer = modWithIntegerInputs(ctx, input0, input1, false);
 
-        ctx->registerLayer(resultLayer, getNodeName(node));
+        ctx->registerLayer(resultLayer, node);
         RETURN_FIRST_OUTPUT(resultLayer);
     }
     // Fmod with integer inputs
@@ -3041,7 +3085,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Mod)
     {
         // Result = input0 - (input1 * Div(input0, input1))
         nvinfer1::IElementWiseLayer* resultLayer = modWithIntegerInputs(ctx, input0, input1, true);
-        ctx->registerLayer(resultLayer, getNodeName(node));
+        ctx->registerLayer(resultLayer, node);
         RETURN_FIRST_OUTPUT(resultLayer);
     }
     // Fmod with floating point inputs
@@ -3065,7 +3109,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Mod)
         // If (input0 / input1) >= 0, result = input0 - (input1 * floor(input0 / input1))
         // Else result = input0 - (input1 * ceil(input0 / input1))
         auto* result = ctx->network()->addSelect(*condition, *outputWithDivFloor, *outputWithDivCeil);
-        ctx->registerLayer(result, getNodeName(node));
+        ctx->registerLayer(result, node);
         RETURN_FIRST_OUTPUT(result);
     }
 }
@@ -3134,7 +3178,7 @@ DEFINE_BUILTIN_OP_IMPORTER(NonMaxSuppression)
     // Create the NMS layer
     auto* layer = ctx->network()->addNMS(*boxesTensorPtr, *transposedScoresTensorPtr, *maxOutputBoxesPerClassTensorPtr);
     ASSERT(layer != nullptr && "Failed to create NMS layer.", ErrorCode::kUNSUPPORTED_NODE);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
 
     // Handle the optional threshold inputs
     if (iouThresholdTensorPtr != nullptr)
@@ -3193,12 +3237,7 @@ DEFINE_BUILTIN_OP_IMPORTER(OneHot)
     CHECK(convertAxis(axis, nbDims+1));
 
     auto* layer = ctx->network()->addOneHot(*indices, *values, *depth, axis);
-
-    auto const outDims = layer->getOutput(0)->getDimensions();
-
-    ASSERT((outDims.d[axis] != -1) && "OneHot does not support dynamic depth input", ErrorCode::kINVALID_NODE);
-
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     RETURN_FIRST_OUTPUT(layer);
 }
 
@@ -3376,7 +3415,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Pad)
         return MAKE_ERROR("Unsupported pad mode", ErrorCode::kUNSUPPORTED_NODE);
     }
 
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     return {{layer->getOutput(0)}};
 }
 
@@ -3402,20 +3441,15 @@ DEFINE_BUILTIN_OP_IMPORTER(PRelu)
     nvinfer1::ITensor* slopes = &convertToTensor(inputs.at(1), ctx);
     CHECK(broadcastTensors(ctx, input, slopes));
     auto* layer = ctx->network()->addParametricReLU(*input, *slopes);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     RETURN_FIRST_OUTPUT(layer);
 }
 
-DEFINE_BUILTIN_OP_IMPORTER(QuantizeLinear)
-{
-    return QuantDequantLinearHelper(ctx, node, inputs, false /*isDQ*/);
-}
-
 NodeImportResult randomUniformHelper(IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node,
     ShapeTensor const& inputShape, OnnxAttrs const& attrs, DataType const& inputDType)
 {
     auto* fillLayer = addFill(ctx, inputShape, nvinfer1::FillOperation::kRANDOM_UNIFORM);
-    ctx->registerLayer(fillLayer, getNodeName(node));
+    ctx->registerLayer(fillLayer, node);
 
     // Set datatype of output:
     //      RandomUniform: dype is required and defaults to 1
@@ -3478,7 +3512,7 @@ NodeImportResult randomNormalHelper(IImporterContext* ctx, ::ONNX_NAMESPACE::Nod
      ShapeTensor const& inputShape, OnnxAttrs const& attrs, DataType const& inputDType)
 {
     auto* fillLayer = addFill(ctx, inputShape, nvinfer1::FillOperation::kRANDOM_NORMAL);
-    ctx->registerLayer(fillLayer, getNodeName(node));
+    ctx->registerLayer(fillLayer, node);
 
     // Set datatype of output:
     //      RandomNormal: dype is required and defaults to 1
@@ -3569,7 +3603,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Range)
     ShapeTensor const numberOfElements = max(ctx, sub(ctx, zero, quotient), zero);
 
     nvinfer1::IFillLayer* layer = addFill(ctx, convertTo1D(ctx, numberOfElements), nvinfer1::FillOperation::kLINSPACE);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
 
     // TensorRT requires that alpha and beta both be dynamic or both be static.
     if (start.allValuesKnown() && delta.allValuesKnown())
@@ -3703,7 +3737,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Resize)
     ASSERT((inputRank > 0) && "The input tensor cannot be a scalar.", ErrorCode::kUNSUPPORTED_NODE);
     // Add resize layer
     nvinfer1::IResizeLayer* layer = ctx->network()->addResize(input);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     OnnxAttrs attrs(node, ctx);
 
     auto mode = attrs.get<std::string>("mode", "nearest");
@@ -3916,119 +3950,38 @@ DEFINE_BUILTIN_OP_IMPORTER(Reshape)
     // "A dimension could also be 0, in which case the actual dimension
     // value is unchanged (i.e. taken from the input tensor)."
     nvinfer1::IShuffleLayer* layer = addShuffle(ctx, data, shape, /*zeroIsPlaceholder=*/!allowZero);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     RETURN_FIRST_OUTPUT(layer);
 }
 
 DEFINE_BUILTIN_OP_IMPORTER(ReverseSequence)
 {
+    ASSERT((inputs.size() == 2) && "ReverseSequence expects two input tensors: input and sequence_lens",
+        ErrorCode::kINVALID_NODE);
     CHECK(notInvalidType(inputs.at(0), {"UINT8"}));
-    OnnxAttrs attrs{node, ctx};
-    int32_t const batch_axis = attrs.get<int32_t>("batch_axis", 1);
-
     nvinfer1::ITensor* input = &convertToTensor(inputs.at(0), ctx);
-    auto const dims = input->getDimensions();
-    int32_t const rank = dims.nbDims;
-    // Sequence tensor: indices tensor of rank = 1 and shape = [batchsize]
-    nvinfer1::ITensor* sequences = &convertToTensor(inputs.at(1), ctx);
-    std::vector<nvinfer1::ITensor*> tensors;
-    // Determine length of batch axis
-    int32_t const size = isDynamic(sequences->getDimensions()) ? dims.d[batch_axis] : sequences->getDimensions().d[0];
-    ASSERT(size != -1 && "This version of TensorRT does not support dynamic ReverseSequence lengths!",
-        ErrorCode::kUNSUPPORTED_NODE);
-
-    for (int32_t i = 0; i < size; i++)
-    {
-
-        /*  Slice across each element in batch_axis
-
-        For batch_axis = 1
-            Starts =  {0, i, 0, 0...}
-            Sizes =   {D0, 1, D2, D3...}
-            Strides = {1, 1, 1, ...}
-
-        For batch_axis = 0
-            Starts =  {i, 0, 0, 0...}
-            Sizes =   {1, D1, D2, D3...}
-            Strides = {1, 1, 1, ...}
-        */
+    nvinfer1::ITensor* sequenceLens = &convertToTensor(inputs.at(1), ctx);
+    auto const inputDims = input->getDimensions();
+    auto const sequenceLensDims = sequenceLens->getDimensions();
+    ASSERT((inputDims.nbDims >= 2) && "Rank of input must be at least two", ErrorCode::kUNSUPPORTED_NODE);
+    ASSERT((sequenceLensDims.nbDims == 1) && "Rank of sequence_lens must be one", ErrorCode::kUNSUPPORTED_NODE);
 
-        ShapeTensor starts = batch_axis == 0 ? concat(ctx, shapeVector(i), shapeVector(0))
-                                             : concat(ctx, shapeVector(0), shapeVector(i));
-        ShapeTensor sizes = batch_axis == 0
-            ? concat(ctx, shapeVector(1), ShapeTensor(*getAxisLength(ctx, input, 1, {1, {1}})))
-            : concat(ctx, ShapeTensor(*getAxisLength(ctx, input, 0, {1, {1}})), shapeVector(1));
-        ShapeTensor strides = fillShapeVector(ctx, 1, shapeVector(rank));
-
-        for (int32_t j = 2; j < rank; j++)
-        {
-            starts = concat(ctx, starts, shapeVector(0));
-            sizes = concat(ctx, sizes, ShapeTensor(*getAxisLength(ctx, input, j, {1, {1}})));
-        }
-
-        auto s1 = addSlice(ctx, *input, starts, sizes, strides);
-        nvinfer1::ITensor* data = s1->getOutput(0);
-        data = squeezeTensor(ctx, node, *data, {batch_axis});
-        // Get sequence length for the current slice
-        auto seqIndex = ctx->network()->addSlice(*sequences, {1, {i}}, {1, {1}}, {1, {1}})->getOutput(0);
-
-        // First slice = slices data[seqIndex - 1 : 0 : -1] on axis 0
-        /*
-            Starts =  {seqIndex - 1, 0, 0 ...}
-            Sizes =   {seqIndex, D1, D2, ...}
-            Strides = {-1, 1, 1, ...}
-        */
-
-        int32_t sliceRank = data->getDimensions().nbDims;
-        starts = sub(ctx, ShapeTensor(*seqIndex), shapeVector(1));
-        ShapeTensor startsFill = fillShapeVector(ctx, 0, shapeVector(sliceRank - 1));
-        starts = concat(ctx, starts, startsFill);
-
-        sizes = ShapeTensor(*seqIndex);
-        for (int32_t j = 1; j < sliceRank; j++)
-        {
-            sizes = concat(ctx, sizes, ShapeTensor(*getAxisLength(ctx, data, j, {1, {1}})));
-        }
-
-        strides = shapeVector(-1);
-        ShapeTensor stridesFill = fillShapeVector(ctx, 1, shapeVector(sliceRank - 1));
-        strides = concat(ctx, strides, stridesFill);
-
-        auto firstSlice = addSlice(ctx, *data, starts, sizes, strides);
-        auto slice1 = firstSlice->getOutput(0);
-
-        // Second slice = slices data[seqIndex:end:1] on axis 0
-
-        /*
-            Starts =  {seqIndex, 0, 0 ... 0}
-            Sizes =   {D0 - seqIndex, D1, D2 ...}
-            Strides = {1, 1, 1, 1 ...}
-        */
-
-        starts = ShapeTensor(*seqIndex);
-        startsFill = fillShapeVector(ctx, 0, shapeVector(sliceRank - 1));
-        starts = concat(ctx, starts, startsFill);
-
-        sizes = sub(ctx, ShapeTensor(*getAxisLength(ctx, data, 0, {1, {1}})), ShapeTensor(*seqIndex));
-        for (int32_t j = 1; j < sliceRank; j++)
-        {
-            sizes = concat(ctx, sizes, ShapeTensor(*getAxisLength(ctx, data, j, {1, {1}})));
-        }
-
-        strides = fillShapeVector(ctx, 1, shapeVector(sliceRank));
+    OnnxAttrs attrs{node, ctx};
+    int32_t const batchAxis = attrs.get<int32_t>("batch_axis", 1);
+    int32_t const sequenceAxis = attrs.get<int32_t>("time_axis", 0);
+    ASSERT((batchAxis >= 0 && batchAxis <= inputDims.nbDims) && "Invalid batch_axis", ErrorCode::kUNSUPPORTED_NODE);
+    ASSERT(
+        (sequenceAxis >= 0 && sequenceAxis <= inputDims.nbDims) && "Invalid time_axis", ErrorCode::kUNSUPPORTED_NODE);
+    ASSERT((batchAxis != sequenceAxis) && "batch_axis and time_axis cannot be the same", ErrorCode::kUNSUPPORTED_NODE);
 
-        auto secondSlice = addSlice(ctx, *data, starts, sizes, strides);
-        auto slice2 = secondSlice->getOutput(0);
+    auto layer = ctx->network()->addReverseSequence(*input, *sequenceLens);
+    ctx->registerLayer(layer, node);
+    ASSERT(layer && "Failed to add ReverseSequence layer.", ErrorCode::kUNSUPPORTED_NODE);
 
-        // Concat the two slices together
-        std::vector<nvinfer1::ITensor*> slices{slice1, slice2};
-        auto fullSliceLayer = ctx->network()->addConcatenation(slices.data(), slices.size());
-        tensors.emplace_back(unsqueezeTensor(ctx, node, *fullSliceLayer->getOutput(0), {batch_axis}));
-    }
+    layer->setBatchAxis(batchAxis);
+    layer->setSequenceAxis(sequenceAxis);
 
-    auto concatLayer = ctx->network()->addConcatenation(tensors.data(), tensors.size());
-    concatLayer->setAxis(batch_axis);
-    RETURN_FIRST_OUTPUT(concatLayer);
+    RETURN_FIRST_OUTPUT(layer);
 }
 
 DEFINE_BUILTIN_OP_IMPORTER(RNN)
@@ -4140,7 +4093,7 @@ DEFINE_BUILTIN_OP_IMPORTER(RNN)
 
     // H(t-1)
     nvinfer1::IRecurrenceLayer* hiddenState = loop->addRecurrence(*initialHidden);
-    ctx->registerLayer(hiddenState, getNodeName(node));
+    ctx->registerLayer(hiddenState, node);
     LOG_VERBOSE("Hidden state shape: " << hiddenState->getOutput(0)->getDimensions());
 
     // Compute intermediate(t) = (X(t) * W^T + H(t-1) * R^T + (Wb + Rb)).
@@ -4247,14 +4200,16 @@ DEFINE_BUILTIN_OP_IMPORTER(RoiAlign)
     f.emplace_back("spatial_scale", &spatialScale, nvinfer1::PluginFieldType::kFLOAT32, 1);
 
     // Create plugin from registry
-    auto const plugin = createPlugin(getNodeName(node), importPluginCreator(pluginName, pluginVersion), f);
+    auto const plugin = createPlugin(getNodeName(node), importPluginCreator(ctx, pluginName, pluginVersion), f);
 
-    ASSERT(plugin != nullptr && "ROIAlign plugin was not found in the plugin registry!",
-        ErrorCode::kUNSUPPORTED_NODE);
+    ASSERT(plugin != nullptr && "ROIAlign plugin was not found in the plugin registry!", ErrorCode::kUNSUPPORTED_NODE);
 
     nvinfer1::ITensor* const inputTensorsPtr[3] = {tensorPtr, roisPtr, batchIndicesPtr};
     auto* layer = ctx->network()->addPluginV2(inputTensorsPtr, 3, *plugin);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
+
+    // ROIAlign requires nvinfer_vc_plugin when using VC.
+    ctx->addUsedVCPluginLibrary(node, pluginName.c_str(), "nvinfer_vc_plugin");
 
     RETURN_FIRST_OUTPUT(layer);
 }
@@ -4335,7 +4290,8 @@ DEFINE_BUILTIN_OP_IMPORTER(Scan)
         stateVars.emplace_back(loop->addRecurrence(convertToTensor(inputs.at(i + opset8Offset), ctx)));
         ctx->registerTensor(TensorOrWeights{stateVars.back()->getOutput(0)}, body.input(i).name());
     }
-    ctx->registerLayer(stateVars.at(0), getNodeName(node));
+    ctx->registerLayer(stateVars.at(0), node);
+
     for (int32_t i = 0; i < nbScanInputs; ++i)
     {
         const int32_t index = nbStateVars + i; // Scan Inputs are after the state variables.
@@ -4400,7 +4356,7 @@ DEFINE_BUILTIN_OP_IMPORTER(GridSample)
 
     // Add grid sample layer
     nvinfer1::IGridSampleLayer* layer = ctx->network()->addGridSample(input, grid);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     OnnxAttrs attrs(node, ctx);
 
     auto paddingMode = attrs.get<std::string>("padding_mode", "zeros");
@@ -4479,7 +4435,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Shape)
 {
     nvinfer1::ITensor& input = convertToTensor(inputs.at(0), ctx);
     auto* layer = ctx->network()->addShape(input);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     RETURN_FIRST_OUTPUT(layer);
 }
 
@@ -4603,7 +4559,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Slice)
 
     nvinfer1::ISliceLayer* slice = addSlice(ctx, data, starts, sizes, steps);
 
-    ctx->registerLayer(slice, getNodeName(node));
+    ctx->registerLayer(slice, node);
 
     RETURN_FIRST_OUTPUT(slice);
 }
@@ -4660,7 +4616,7 @@ DEFINE_BUILTIN_OP_IMPORTER(SpaceToDepth)
 
     auto* firstShuffle = addShuffle(ctx, *tensorPtr, firstShapeDims);
     firstShuffle->setSecondTranspose(perm);
-    ctx->registerLayer(firstShuffle, getNodeName(node));
+    ctx->registerLayer(firstShuffle, node);
     tensorPtr = firstShuffle->getOutput(0);
 
     // Reshape to {N, C * blockSize * blockSize, H / blockSize, W / blockSize}
@@ -4767,7 +4723,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Split)
         }
 
         nvinfer1::ISliceLayer* slice = addSlice(ctx, inputTensor, starts, sizes, ones);
-        ctx->registerLayer(slice, getNodeName(node));
+        ctx->registerLayer(slice, node);
         outputs.emplace_back(slice->getOutput(0));
     }
 
@@ -4883,7 +4839,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Tile)
     ShapeTensor outputShape = mul(ctx, inputDims, repeats);
     nvinfer1::ISliceLayer* tile
         = addSlice(ctx, input, similar(ctx, inputDims, 0), outputShape, similar(ctx, inputDims, 1));
-    ctx->registerLayer(tile, getNodeName(node));
+    ctx->registerLayer(tile, node);
     tile->setMode(nvinfer1::SliceMode::kWRAP);
 
     RETURN_FIRST_OUTPUT(tile);
@@ -4891,24 +4847,16 @@ DEFINE_BUILTIN_OP_IMPORTER(Tile)
 
 DEFINE_BUILTIN_OP_IMPORTER(TopK)
 {
-    CHECK(notInvalidType(inputs.at(0), {"INT32", "UINT8"}));
+    CHECK(notInvalidType(inputs.at(0), {"UINT8"}));
     nvinfer1::ITensor* tensorPtr = &convertToTensor(inputs.at(0), ctx);
     OnnxAttrs attrs(node, ctx);
     int32_t axis = attrs.get("axis", -1);
-    int32_t k;
-    if (ctx->getOpsetVersion() >= 10)
-    {
-        assertIsWeights(inputs.at(1), "This version of TensorRT only supports input K as an initializer.");
-        ASSERT((inputs.at(1).weights().count() == 1) && "The input K must contain exactly 1 value.",
-            ErrorCode::kUNSUPPORTED_NODE);
-        k = *static_cast<int*>(inputs.at(1).weights().values);
-    }
-    else
+    int32_t k{1};
+    if (ctx->getOpsetVersion() < 10)
     {
-        ASSERT( (attrs.count("k")) && "Attribute k is missing.", ErrorCode::kINVALID_NODE);
+        ASSERT((attrs.count("k")) && "Attribute k is missing.", ErrorCode::kINVALID_NODE);
         k = attrs.get<int>("k");
     }
-
     int32_t nbDims = tensorPtr->getDimensions().nbDims;
     CHECK(convertAxis(axis, nbDims));
     uint32_t axisMask = 1 << axis;
@@ -4921,6 +4869,14 @@ DEFINE_BUILTIN_OP_IMPORTER(TopK)
         tensorPtr = unsqueezeTensor(ctx, node, *tensorPtr, axes);
         ASSERT(tensorPtr && "Failed to unsqueeze input x.", ErrorCode::kUNSUPPORTED_NODE);
     }
+    bool needCast = tensorPtr->getType() == nvinfer1::DataType::kINT32;
+    if (needCast)
+    {
+        LOG_WARNING(
+            "TensorRT is using FLOAT32 precision to run an INT32 TopK. Rounding errors may occur for large integer "
+            "values");
+        tensorPtr = castHelper(ctx, tensorPtr, nvinfer1::DataType::kFLOAT);
+    }
 
     // Default is top max k.
     auto operation = nvinfer1::TopKOperation::kMAX;
@@ -4933,7 +4889,14 @@ DEFINE_BUILTIN_OP_IMPORTER(TopK)
         }
     }
     nvinfer1::ITopKLayer* layer = ctx->network()->addTopK(*tensorPtr, operation, k, axisMask);
-    ctx->registerLayer(layer, getNodeName(node));
+    if (ctx->getOpsetVersion() >= 10)
+    {
+        ASSERT((inputs.size() == 2) && "Expects two input tensors for opset >= 10: X and K", ErrorCode::kINVALID_NODE);
+        nvinfer1::ITensor* kPtr = &convertToTensor(inputs.at(1), ctx);
+        kPtr = convertToScalar(ctx, kPtr);
+        layer->setInput(1, *kPtr);
+    }
+    ctx->registerLayer(layer, node);
     ASSERT(layer && "Failed to add TopK layer.", ErrorCode::kUNSUPPORTED_NODE);
 
     nvinfer1::ITensor* values = layer->getOutput(0);
@@ -4949,6 +4912,11 @@ DEFINE_BUILTIN_OP_IMPORTER(TopK)
         ASSERT(indices && "Failed to squeeze the input indices.", ErrorCode::kUNSUPPORTED_NODE);
     }
 
+    if (needCast)
+    {
+        values = castHelper(ctx, values, nvinfer1::DataType::kINT32);
+    }
+
     return {{values, indices}};
 }
 
@@ -5153,7 +5121,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Upsample)
         }
         layer->setScales(scale_factors.data(), nbDims);
     }
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     layer->setResizeMode(resizeMode);
     layer->setSelectorForSinglePixel(nvinfer1::ResizeSelector::kFORMULA);
     layer->setNearestRounding(nvinfer1::ResizeRoundMode::kFLOOR);
@@ -5179,7 +5147,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Where)
     ASSERT( (cDims.nbDims == yDims.nbDims) && "The shape of the condition input tensor must be the same of the input y tensor.", ErrorCode::kUNSUPPORTED_NODE);
 
     auto* layer = ctx->network()->addSelect(*condition, *x, *y);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
 
     RETURN_FIRST_OUTPUT(layer);
 }
@@ -5388,7 +5356,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Shrink)
         = &elementwiseHelper(ctx, node, {x, biasTensor}, nvinfer1::ElementWiseOperation::kSUM).value().at(0).tensor();
 
     auto* layer = ctx->network()->addSelect(*xLessThanMinusLambd, *xAddBias, *output);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
 
     // cast back to originalType
     return {{castHelper(ctx, layer->getOutput(0), originalType)}};
@@ -5417,7 +5385,7 @@ DEFINE_BUILTIN_OP_IMPORTER(NonZero)
     ASSERT((x->getType() == DataType::kFLOAT || x->getType() == DataType::kHALF || x->getType() == DataType::kINT32 || x->getType() == DataType::kINT8 || x->getType() == DataType::kBOOL)
         && "Only FLOAT, HALF, INT32, INT8 or BOOL input is supported for the NonZero operator in this version of TensorRT", ErrorCode::kUNSUPPORTED_NODE);
     auto* layer = ctx->network()->addNonZero(*x);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     RETURN_FIRST_OUTPUT(layer);
 }
 
@@ -5431,7 +5399,7 @@ DEFINE_BUILTIN_OP_IMPORTER(FallbackPluginImporter)
 
     LOG_INFO("Searching for plugin: " << pluginName << ", plugin_version: " << pluginVersion
                                       << ", plugin_namespace: " << pluginNamespace);
-    nvinfer1::IPluginCreator* creator = importPluginCreator(pluginName, pluginVersion, pluginNamespace);
+    nvinfer1::IPluginCreator* creator = importPluginCreator(ctx, pluginName, pluginVersion, pluginNamespace);
     ASSERT(creator && "Plugin not found, are the plugin name, version, and namespace correct?",
         ErrorCode::kUNSUPPORTED_NODE);
 
@@ -5450,7 +5418,7 @@ DEFINE_BUILTIN_OP_IMPORTER(FallbackPluginImporter)
     }
     LOG_INFO("Successfully created plugin: " << pluginName);
     auto* layer = ctx->network()->addPluginV2(pluginInputs.data(), pluginInputs.size(), *plugin);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     RETURN_ALL_OUTPUTS(layer);
 }
 
@@ -5498,7 +5466,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_Scale)
     }
 
     nvinfer1::IScaleLayer* layer = ctx->network()->addScale(input, mode, shift, scale, power);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     RETURN_FIRST_OUTPUT(layer);
 }
 
@@ -5513,7 +5481,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_Shuffle)
     bool zeroIsPlaceholder = attrs.get<bool>("zero_is_placeholder");
 
     nvinfer1::IShuffleLayer* layer = ctx->network()->addShuffle(input);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     layer->setFirstTranspose(perm1);
     layer->setSecondTranspose(perm2);
     layer->setZeroIsPlaceholder(zeroIsPlaceholder);
@@ -5548,7 +5516,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_TopK_Min)
     int32_t axes = 1 << (attrs.get<int32_t>("axis"));
 
     nvinfer1::ITopKLayer* layer = ctx->network()->addTopK(input, nvinfer1::TopKOperation::kMIN, k, axes);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
 
     RETURN_ALL_OUTPUTS(layer);
 }
@@ -5565,7 +5533,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_MatMul)
     nvinfer1::MatrixOperation op1 = attrs.get<nvinfer1::MatrixOperation>("op_1");
 
     nvinfer1::IMatrixMultiplyLayer* layer = ctx->network()->addMatrixMultiply(input0, op0, input1, op1);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     RETURN_FIRST_OUTPUT(layer);
 }
 
@@ -5600,7 +5568,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_RNNv2)
 
     int32_t counter = 1;
     nvinfer1::IRNNv2Layer* layer = ctx->network()->addRNNv2(input, layerCount, hiddenSize, maxSeqLen, op);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     layer->setInputMode(inputMode);
     layer->setDirection(direction);
 
@@ -5674,7 +5642,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_RaggedSoftmax)
     auto& bounds = inputs.at(1).tensor();
 
     nvinfer1::IRaggedSoftMaxLayer* layer = ctx->network()->addRaggedSoftMax(input, bounds);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     RETURN_FIRST_OUTPUT(layer);
 }
 
@@ -5699,7 +5667,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_FullyConnected)
 
     nvinfer1::IFullyConnectedLayer* layer
         = ctx->network()->addFullyConnected(input, nbChannels, kernelWeights, biasWeights);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     RETURN_FIRST_OUTPUT(layer);
 }
 
@@ -5722,7 +5690,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_MaxAverageBlendPool)
     nvinfer1::IPoolingLayer* layer
         = ctx->network()->addPoolingNd(input, nvinfer1::PoolingType::kMAX_AVERAGE_BLEND, kernelSize);
     ASSERT(layer && "Failed to create a Pooling layer.", ErrorCode::kUNSUPPORTED_NODE);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     layer->setStrideNd(strides);
     layer->setAverageCountExcludesPadding(exclude_padding);
     layer->setPaddingMode(paddingMode);
@@ -5760,7 +5728,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_PluginV2)
     auto const plugin = creator->deserializePlugin("", buffer.data(), buffer.size());
 
     nvinfer1::IPluginV2Layer* layer = ctx->network()->addPluginV2(tensors.data(), tensors.size(), *plugin);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     RETURN_ALL_OUTPUTS(layer);
 }
 #endif // ENABLE_STD_PLUGIN
@@ -5788,7 +5756,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_Gather)
     }
 
     nvinfer1::IGatherLayer* layer = ctx->network()->addGather(data, indices, axis);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     layer->setNbElementWiseDims(nbElementWiseDims);
     RETURN_FIRST_OUTPUT(layer);
 }
@@ -5818,7 +5786,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_Slice)
         ShapeTensor const stride{ctx, inputs.at(3)};
         layer = addSlice(ctx, input, start, size, stride);
     }
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     RETURN_FIRST_OUTPUT(layer);
 }
 
@@ -5829,7 +5797,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_Resize)
 
     nvinfer1::IResizeLayer* layer;
     layer = ctx->network()->addResize(input);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
 
     OnnxAttrs attrs(node, ctx);
     auto const mode = attrs.get<nvinfer1::ResizeMode>("mode");
diff --git a/docs/Changelog.md b/docs/Changelog.md
index db420c05..468ae969 100644
--- a/docs/Changelog.md
+++ b/docs/Changelog.md
@@ -2,6 +2,23 @@
 
 # ONNX-TensorRT Changelog
 
+# TensorRT 8.6 EA Release - 2023-3-13
+
+## Added
+
+For more details, see the 8.6 EA release notes for new features added in TensorRT 8.6.
+
+- Added support for `GroupNormalization`, `LayerNormalization`, `IsInf` operations
+- Added support for INT32 input types for `Argmin`, `Argmax`, and `TopK`
+- Added support for `ReverseSequence` operators with dynamic shapes
+- Added support for `TopK` operators with dynamic `K` values
+- Added `OnnxParserFlag` enum and `setFlag` interfaces to the ONNX parser to modify the default parsing behavior
+- Added metadata tracking, now ONNX node metadata will be embedded into TensorRT layers
+
+## Changed
+
+- All cast operations will now use the new `CastLayer` over the pervious `IdentityLayer`. 
+
 # TensorRT 8.5 GA Release - 2022-11-2
 
 ## Added
diff --git a/docs/operators.md b/docs/operators.md
index 26ebcb3b..6ab1651d 100644
--- a/docs/operators.md
+++ b/docs/operators.md
@@ -2,7 +2,7 @@
 
 # Supported ONNX Operators
 
-TensorRT 8.5 supports operators up to Opset 17. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/master/docs/Operators.md)
+TensorRT 8.6 supports operators up to Opset 17. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/master/docs/Operators.md)
 
 TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, INT8, and BOOL
 
@@ -19,8 +19,8 @@ See below for the support matrix of ONNX operators in ONNX-TensorRT.
 | Acosh                     | Y          | FP32, FP16 |
 | Add                       | Y          | FP32, FP16, INT32 |
 | And                       | Y          | BOOL |
-| ArgMax                    | Y          | FP32, FP16 |
-| ArgMin                    | Y          | FP32, FP16 |
+| ArgMax                    | Y          | FP32, FP16, INT32 |
+| ArgMin                    | Y          | FP32, FP16, INT32 |
 | Asin                      | Y          | FP32, FP16 |
 | Asinh                     | Y          | FP32, FP16 |
 | Atan                      | Y          | FP32, FP16 |
@@ -30,7 +30,7 @@ See below for the support matrix of ONNX operators in ONNX-TensorRT.
 | Bernoulli                 | N          |
 | BitShift                  | N          |
 | BlackmanWindow            | N          |
-| Cast                      | Y          | FP32, FP16, INT32, INT8, BOOL |                                                                                                       |
+| Cast                      | Y          | FP32, FP16, INT32, INT8, UINT8, BOOL |                                                                                                       |
 | Ceil                      | Y          | FP32, FP16 |
 | Celu                      | Y          | FP32, FP16 |
 | Clip                      | Y          | FP32, FP16, INT8 |                                                                                        |
@@ -70,7 +70,8 @@ See below for the support matrix of ONNX operators in ONNX-TensorRT.
 | GlobalMaxPool             | Y          | FP32, FP16, INT8 |
 | Greater                   | Y          | FP32, FP16, INT32 |
 | GreaterOrEqual            | Y          | FP32, FP16, INT32 |
-| GridSample                | Y          | FP32, FP16
+| GridSample                | Y          | FP32, FP16 |
+| GroupNormalization        | Y          | FP32, FP16 |
 | GRU                       | Y          | FP32, FP16 | For bidirectional GRUs, activation functions must be the same for both the forward and reverse pass
 | HammingWindow             | N          |
 | HannWindow                | N          |
@@ -80,10 +81,10 @@ See below for the support matrix of ONNX operators in ONNX-TensorRT.
 | Identity                  | Y          | FP32, FP16, INT32, INT8, BOOL |
 | If                        | Y          | FP32, FP16, INT32, BOOL | Output tensors of the two conditional branches must have broadcastable shapes, and must have different names
 | ImageScaler               | Y          | FP32, FP16 |
-| InstanceNormalization     | Y          | FP32, FP16 | Scales `scale` and biases `B` must be initializers. Input rank must be >=3 & <=5                                                                                  |
-| IsInf                     | N          |
+| InstanceNormalization     | Y          | FP32, FP16 |
+| IsInf                     | Y          | FP32, FP16
 | IsNaN                     | Y          | FP32, FP16, INT32 |
-| LayerNormalization        | N          |
+| LayerNormalization        | Y          | FP32, FP16
 | LeakyRelu                 | Y          | FP32, FP16, INT8 |
 | Less                      | Y          | FP32, FP16, INT32 |
 | LessOrEqual               | Y          | FP32, FP16, INT32 |
@@ -143,7 +144,7 @@ See below for the support matrix of ONNX operators in ONNX-TensorRT.
 | Relu                      | Y          | FP32, FP16, INT8 |
 | Reshape                   | Y          | FP32, FP16, INT32, INT8, BOOL |
 | Resize                    | Y          | FP32, FP16 | Supported resize transformation modes: `half_pixel`, `pytorch_half_pixel`, `tf_half_pixel_for_nn`, `asymmetric`, and `align_corners`.<br />Supported resize modes: `nearest`, `linear`.<br />Supported nearest modes: `floor`, `ceil`, `round_prefer_floor`, `round_prefer_ceil`   |
-| ReverseSequence           | Y          | FP32, FP16 | Dynamic input shapes are unsupported
+| ReverseSequence           | Y          | FP32, FP16, INT32, INT8, BOOL |
 | RNN                       | Y          | FP32, FP16 | For bidirectional RNNs, activation functions must be the same for both the forward and reverse pass
 | RoiAlign                  | Y          | FP32, FP16 |
 | Round                     | Y          | FP32, FP16, INT8 |
@@ -186,7 +187,7 @@ See below for the support matrix of ONNX operators in ONNX-TensorRT.
 | TfIdfVectorizer           | N          |
 | ThresholdedRelu           | Y          | FP32, FP16, INT8 |
 | Tile                      | Y          | FP32, FP16, INT32, BOOL |
-| TopK                      | Y          | FP32, FP16 | `K` input must be an initializer
+| TopK                      | Y          | FP32, FP16, INT32 |
 | Transpose                 | Y          | FP32, FP16, INT32, INT8, BOOL |
 | Trilu                     | Y          | FP32, FP16, INT32, INT8, BOOL |
 | Unique                    | N          |
diff --git a/onnx2trt.hpp b/onnx2trt.hpp
index e4783875..4a732c2c 100644
--- a/onnx2trt.hpp
+++ b/onnx2trt.hpp
@@ -47,9 +47,15 @@ class IImporterContext
     virtual StringMap<std::string>& loopTensors() = 0;
     virtual void setOnnxFileLocation(std::string location) = 0;
     virtual std::string getOnnxFileLocation() = 0;
-    virtual void registerTensor(TensorOrWeights tensor, const std::string& basename, bool const checkUniqueName = false)
-        = 0;
-    virtual void registerLayer(nvinfer1::ILayer* layer, const std::string& basename) = 0;
+    virtual void registerTensor(TensorOrWeights tensor, std::string const& basename, bool const checkUniqueName = false) = 0;
+
+    //! Register a layer, which ensures it has a unique name.
+    //! If node!=nullptr, set the metadata for the layer to the node's name.
+    virtual void registerLayer(nvinfer1::ILayer* layer, std::string const& basename, ::ONNX_NAMESPACE::NodeProto const* node) = 0;
+
+    //! Short form of register layer to use when the basename is the node's name.
+    virtual void registerLayer(nvinfer1::ILayer* layer, ::ONNX_NAMESPACE::NodeProto const& node) = 0;
+
     virtual ShapedWeights createTempWeights(ShapedWeights::DataType type, nvinfer1::Dims shape, uint8_t value = 0) = 0;
     virtual int64_t getOpsetVersion(const char* domain = "") const = 0;
     virtual nvinfer1::ILogger& logger() = 0;
@@ -57,11 +63,25 @@ class IImporterContext
     virtual nvinfer1::IErrorRecorder* getErrorRecorder() const = 0;
     virtual nvinfer1::IConstantLayer* getConstantLayer(const char* name) const = 0;
 
+    virtual void setFlags(nvonnxparser::OnnxParserFlags const& onnxParserFlags) = 0;
+    virtual nvonnxparser::OnnxParserFlags getFlags() const = 0;
+
     //! Push a new scope for base names (ONNX names).
     virtual void pushBaseNameScope() = 0;
 
     //! Revert actions of registerTensor for names in the top scope and pop it.
     virtual void popBaseNameScope() = 0;
+
+    //! Declare the given node requires a plugin library for the given pluginName, which is provided by the
+    //! logical library name pluginLib (should correspond to the DLL/DSO name with suffix and "lib" prefix stripped,
+    //! e.g. nvinfer_vc_plugin for libnvinfer_vc_plugin.so.8).
+    virtual void addUsedVCPluginLibrary(
+        ::ONNX_NAMESPACE::NodeProto const& node, char const* pluginName, char const* pluginLib)
+        = 0;
+
+    // Returns a list of strings corresponding to paths to the used VC plugins on disk.  May throw on error.
+    virtual std::vector<std::string> getUsedVCPluginLibraries() = 0;
+
 protected:
     virtual ~IImporterContext() {}
 };
diff --git a/onnx2trt_utils.cpp b/onnx2trt_utils.cpp
index e981aa07..b7d19a34 100644
--- a/onnx2trt_utils.cpp
+++ b/onnx2trt_utils.cpp
@@ -4,6 +4,7 @@
 
 #include "onnx2trt_utils.hpp"
 #include "OnnxAttrs.hpp"
+#include "NvInferSafeRuntime.h"
 #include <set>
 
 namespace onnx2trt
@@ -39,7 +40,7 @@ NodeImportResult activationHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE:
     {
         layer->setBeta(*beta);
     }
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     return {{layer->getOutput(0)}};
 }
 
@@ -58,19 +59,29 @@ nvinfer1::ITensor* addClip(IImporterContext* ctx, nvinfer1::ITensor* input, floa
 NodeImportResult argMinMaxHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node,
     std::vector<TensorOrWeights>& inputs, nvinfer1::TopKOperation op)
 {
-    CHECK(notInvalidType(inputs.at(0), {"INT32", "UINT8"}));
-    nvinfer1::ITensor& tensor = convertToTensor(inputs.at(0), ctx);
+    CHECK(notInvalidType(inputs.at(0), {"UINT8"}));
+    nvinfer1::ITensor* tensor = &convertToTensor(inputs.at(0), ctx);
+
+    bool needCast = tensor->getType() == nvinfer1::DataType::kINT32;
+    if (needCast)
+    {
+        LOG_WARNING(
+            "TensorRT is using FLOAT32 precision to run an INT32 ArgMax / ArgMin. Rounding errors may occur for large "
+            "integer values");
+        tensor = castHelper(ctx, tensor, nvinfer1::DataType::kFLOAT);
+    }
+
     // Get attributes.
     OnnxAttrs attrs(node, ctx);
-    int keepdims = attrs.get("keepdims", 1);
-    int axis = attrs.get("axis", 0);
-    int selectLastIndex = attrs.get<int>("select_last_index", 0);
+    int32_t keepdims = attrs.get("keepdims", 1);
+    int32_t axis = attrs.get("axis", 0);
+    int32_t selectLastIndex = attrs.get<int32_t>("select_last_index", 0);
     ASSERT((!selectLastIndex || (selectLastIndex && ctx->getOpsetVersion() >= 12))
             && "Per-opset 12 ONNX does not support the select_last_index attribute.",
         ErrorCode::kUNSUPPORTED_NODE);
 
     // Insert a TopK layer with k set to 1.
-    int nbDims = tensor.getDimensions().nbDims;
+    int32_t nbDims = tensor->getDimensions().nbDims;
     CHECK(convertAxis(axis, nbDims));
     uint32_t axisMask = 1 << axis;
     nvinfer1::ITopKLayer* layer;
@@ -81,7 +92,7 @@ NodeImportResult argMinMaxHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE::
     if (selectLastIndex)
     {
         // Need to flip the data input along the given axis using the Slice operator
-        const auto dims = shapeOf(tensor);
+        auto const dims = shapeOf(*tensor);
         ShapeTensor starts = shapeVector(-1);
         ShapeTensor ends = shapeVector(static_cast<int64_t>(INT_MIN));
         ShapeTensor axes = shapeVector(axis);
@@ -91,26 +102,27 @@ NodeImportResult argMinMaxHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE::
         {
             // axes specify a subset of the dimensions, or out of order.
             // Convert starts/ends/steps to complete in-order form.
-            const ShapeTensor subscripts{axesToInterlaceSubscripts(axes, dims.size())};
+            ShapeTensor const subscripts{axesToInterlaceSubscripts(axes, dims.size())};
             starts = interlace(ctx, similar(ctx, dims, 0), starts, subscripts);
             ends = interlace(ctx, dims, ends, subscripts);
             steps = interlace(ctx, similar(ctx, dims, 1), steps, subscripts);
         }
         decodeOnnxStartsAndEnds(ctx, dims, steps, starts, ends);
         // TensorRT uses sizes of the output dimensions instead of ends.
-        const ShapeTensor sizes = computeSliceSizes(ctx, starts, ends, steps, dims);
+        ShapeTensor const sizes = computeSliceSizes(ctx, starts, ends, steps, dims);
 
-        nvinfer1::ISliceLayer* slice = addSlice(ctx, tensor, starts, sizes, steps);
+        nvinfer1::ISliceLayer* slice = addSlice(ctx, *tensor, starts, sizes, steps);
         nvinfer1::ITensor& flippedTensor = *slice->getOutput(0);
         layer = ctx->network()->addTopK(flippedTensor, op, 1, axisMask);
     }
     else
     {
-        layer = ctx->network()->addTopK(tensor, op, 1, axisMask);
+        layer = ctx->network()->addTopK(*tensor, op, 1, axisMask);
     }
 
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     ASSERT(layer && "Failed to register layer.", ErrorCode::kUNSUPPORTED_NODE);
+
     // We don't care about the TopK values, just the indices.
     nvinfer1::ITensor* indices = layer->getOutput(1);
     indices->setType(nvinfer1::DataType::kINT32);
@@ -121,7 +133,7 @@ NodeImportResult argMinMaxHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE::
     if (selectLastIndex)
     {
         // Use shapeTensor semantics to support dynamic shapes
-        auto const dims = shapeOf(tensor);
+        auto const dims = shapeOf(*tensor);
         auto const indicesDims = shapeOf(*indices);
         auto const axisTensor = shapeVector(axis);
         auto const dimOnAxis = gather(ctx, dims, axisTensor);
@@ -143,7 +155,7 @@ NodeImportResult argMinMaxHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE::
     else
     {
         // Otherwise, we need to squeeze the axis dimension
-        std::vector<int> axes{axis};
+        std::vector<int32_t> axes{axis};
         indices = squeezeTensor(ctx, node, *indices, axes);
         return {{indices}};
     }
@@ -310,28 +322,13 @@ nvinfer1::ITensor* castHelper(IImporterContext* ctx, nvinfer1::ITensor* input, n
 nvinfer1::ITensor* constantOfShape(IImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node,
     nvinfer1::ITensor* constant, nvinfer1::ITensor* shape)
 {
-    int rank = shape->getDimensions().d[0];
-
-    std::vector<int> starts(rank);
-    std::fill(starts.begin(), starts.end(), 0);
-
-    nvinfer1::Dims strides{rank};
-    std::fill(strides.d, strides.d + strides.nbDims, 0);
-
-    // Slice will not work if constant does not have the same rank as start/size/strides.
-    nvinfer1::Dims unsqueezeDims{rank};
-    std::fill(unsqueezeDims.d, unsqueezeDims.d + unsqueezeDims.nbDims, 1);
-    nvinfer1::IShuffleLayer* unsqueeze = ctx->network()->addShuffle(*constant);
-    unsqueeze->setReshapeDimensions(unsqueezeDims);
-    unsqueeze->setZeroIsPlaceholder(false);
-    constant = unsqueeze->getOutput(0);
-
-    nvinfer1::ISliceLayer* broadcast = ctx->network()->addSlice(*constant, nvinfer1::Dims{}, nvinfer1::Dims{}, strides);
-    broadcast->setInput(1,
-        *addConstant(ctx, starts, ::ONNX_NAMESPACE::TensorProto_DataType_INT32, nvinfer1::Dims{1, rank})->getOutput(0));
-    broadcast->setInput(2, *shape);
-    ctx->registerLayer(broadcast, getNodeName(node));
-    return broadcast->getOutput(0);
+    ShapeTensor shapeT{*shape};
+    ShapeTensor zeros = similar(ctx, shapeT, 0);
+    // `constant` must be broadcasted to the same rank as `shape`.
+    ShapeTensor broadcastedShape = similar(ctx, shapeT, 1);
+    constant = &reshape(ctx, *constant, broadcastedShape);
+    auto l = addSlice(ctx, *constant, zeros, shapeT, zeros);
+    return l->getOutput(0);
 }
 
 Status convertAxis(int& axis, int nbDims)
@@ -369,13 +366,13 @@ bool convertDtype(int32_t onnx_dtype, nvinfer1::DataType* trt_dtype)
 
 int32_t* convertINT64(const int64_t* weightValues, nvinfer1::Dims shape, IImporterContext* ctx)
 {
-    static bool logged = false;
-    if (!logged)
+    auto ctxImpl = static_cast<ImporterContext*>(ctx);
+    if (!ctxImpl->isConvertINT64Logged())
     {
         LOG_WARNING(
             "Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. "
             "Attempting to cast down to INT32.");
-        logged = true;
+        ctxImpl->setConvertINT64Logged(true);
     }
 
     const size_t nbWeights = volume(shape);
@@ -398,9 +395,10 @@ int32_t* convertINT64(const int64_t* weightValues, nvinfer1::Dims shape, IImport
             int32Weights[i] = static_cast<int32_t>(weightValues[i]);
         }
     }
-    if (outOfBounds)
+    if (outOfBounds && !ctxImpl->isConvertINT64OutOfBoundsLogged())
     {
         LOG_WARNING("One or more weights outside the range of INT32 was clamped");
+        ctxImpl->setConvertINT64OutOfBoundsLogged(true);
     }
 
     return int32Weights;
@@ -507,14 +505,14 @@ int32_t* convertUINT8(const uint8_t* weightValues, nvinfer1::Dims shape, IImport
 
 float* convertDouble(const double* weightValues, nvinfer1::Dims shape, IImporterContext* ctx)
 {
-    static bool logged = false;
-    if (!logged)
+    auto ctxImpl = static_cast<ImporterContext*>(ctx);
+    if (!ctxImpl->isConvertDoubleLogged())
     {
         LOG_WARNING(
             "Your ONNX model has been generated with double-typed weights, while TensorRT does not natively support "
             "double. "
             "Attempting to cast down to float.");
-        logged = true;
+        ctxImpl->setConvertDoubleLogged(true);
     }
     const size_t nbWeights = volume(shape);
     float* floatWeights{
@@ -537,9 +535,10 @@ float* convertDouble(const double* weightValues, nvinfer1::Dims shape, IImporter
             floatWeights[i] = static_cast<float>(weightValues[i]);
         }
     }
-    if (outOfBounds)
+    if (outOfBounds && !ctxImpl->isConvertDoubleOutOfBoundsLogged())
     {
         LOG_WARNING("One or more weights outside the range of FLOAT was clamped");
+        ctxImpl->setConvertDoubleOutOfBoundsLogged(true);
     }
 
     return floatWeights;
@@ -605,42 +604,50 @@ bool convertOnnxWeights(
 
         // For weights parsed from external files, createTempWeights is necessary to keep them in scope
         ShapedWeights externalWeights;
+        dataPtr = dataBuf.data();
 
         // Cast non-native TRT types to their corresponding proxy types
         if (onnxDtype == ::ONNX_NAMESPACE::TensorProto::INT64)
         {
-            dataPtr = dataBuf.data();
+            // Cast INT64 weights to INT32.
             dataPtr = convertINT64(reinterpret_cast<const int64_t*>(dataPtr), shape, ctx);
             nbytes = nbytes / (sizeof(int64_t) / sizeof(int32_t));
             onnxDtype = ::ONNX_NAMESPACE::TensorProto::INT32;
-            externalWeights = ctx->createTempWeights(onnxDtype, shape);
-            std::memcpy(externalWeights.values, dataPtr, nbytes);
         }
         else if (onnxDtype == ::ONNX_NAMESPACE::TensorProto::UINT8)
         {
-            dataPtr = dataBuf.data();
+            // Cast UINT8 weights to INT32.
             dataPtr = convertUINT8(reinterpret_cast<const uint8_t*>(dataPtr), shape, ctx);
             nbytes = nbytes * (sizeof(int32_t) / sizeof(uint8_t));
             onnxDtype = ::ONNX_NAMESPACE::TensorProto::INT32;
-            externalWeights = ctx->createTempWeights(onnxDtype, shape);
-            std::memcpy(externalWeights.values, dataPtr, nbytes);
         }
         else if (onnxDtype == ::ONNX_NAMESPACE::TensorProto::DOUBLE)
         {
-            dataPtr = dataBuf.data();
+            // Cast DOUBLE weights to FLOAT.
             dataPtr = convertDouble(reinterpret_cast<const double*>(dataPtr), shape, ctx);
             nbytes = nbytes / (sizeof(double) / sizeof(float));
             onnxDtype = ::ONNX_NAMESPACE::TensorProto::FLOAT;
-            externalWeights = ctx->createTempWeights(onnxDtype, shape);
-            std::memcpy(externalWeights.values, dataPtr, nbytes);
         }
-        // Copy weight values directly to externalWeights
-        else
+
+        // Create the holder for external weights.
+        externalWeights = ctx->createTempWeights(onnxDtype, shape);
+
+        // Check if the size of external weights is as expected.
+        if (externalWeights.size_bytes() != nbytes)
         {
-            externalWeights = ctx->createTempWeights(onnxDtype, shape);
-            std::memcpy(externalWeights.values, dataBuf.data(), nbytes);
+            LOG_ERROR("Unexpected size for the external weights! Expected size: "
+                << externalWeights.size_bytes()
+                << " bytes (shape = "
+                << shape
+                << "). Actual size: "
+                << nbytes
+                << " bytes.");
+            return false;
         }
 
+        // Copy the weight values into externalWeights.
+        std::memcpy(externalWeights.values, dataPtr, nbytes);
+
         *weights = externalWeights;
         return true;
     }
@@ -790,7 +797,7 @@ nvinfer1::ITensor& convertToTensor(TensorOrWeights& input, IImporterContext* ctx
     // Register layer and constant name (if set) into RefitMap:
     if (weights.getName())
     {
-        ctx->registerLayer(constantLayer, weights.getName());
+        ctx->registerLayer(constantLayer, weights.getName(), nullptr);
         ctx->network()->setWeightsName(weights, weights.getName());
     }
     return *(constantLayer->getOutput(0));
@@ -900,7 +907,7 @@ NodeImportResult elementwiseHelper(IImporterContext* ctx, ::ONNX_NAMESPACE::Node
                 && "The number of dimensions should remain the same adding inputs.",
             ErrorCode::kUNSUPPORTED_NODE);
         auto* layer = ctx->network()->addElementWise(*combined, *tensor, binary_op);
-        ctx->registerLayer(layer, getNodeName(node));
+        ctx->registerLayer(layer, node);
         ASSERT(layer && "Failed to register layer.", ErrorCode::kUNSUPPORTED_NODE);
         combined = layer->getOutput(0);
     }
@@ -918,7 +925,7 @@ nvinfer1::ITensor* flattenTensor(
     nvinfer1::IShuffleLayer* flattenLayer = addShuffle(ctx, tensor, concat(ctx, d0, d1), /*zeroIsPlaceholder=*/false);
     if (regLayer)
     {
-        ctx->registerLayer(flattenLayer, getNodeName(node));
+        ctx->registerLayer(flattenLayer, node);
     }
     return flattenLayer->getOutput(0);
 }
@@ -1172,7 +1179,7 @@ nvinfer1::ITensor* globalPoolingHelper(IImporterContext* ctx, ::ONNX_NAMESPACE::
     // Generate a bitmask of all 1s except the last 2 bits (N and C axes)
     uint32_t reduceAxes = ((1 << dims.nbDims) - 1) & ~0b11;
     auto* layer = ctx->network()->addReduce(tensor, op, reduceAxes, /*keepDimensions=*/true);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     return layer->getOutput(0);
 }
 
@@ -1185,21 +1192,18 @@ nvinfer1::ITensor* greaterLessOrEqual(IImporterContext* ctx, const ::ONNX_NAMESP
     return result;
 }
 
-nvinfer1::IPluginCreator* importPluginCreator(
-    const std::string& pluginName, const std::string& pluginVersion, const std::string& pluginNamespace)
+nvinfer1::IPluginCreator* importPluginCreator(IImporterContext* ctx, std::string const& pluginName,
+    std::string const& pluginVersion, std::string const& pluginNamespace)
 {
     nvinfer1::IPluginCreator* creator = nullptr;
 
 #if ENABLE_STD_PLUGIN
-    auto pluginRegistry = nvinfer1::getBuilderPluginRegistry(nvinfer1::EngineCapability::kSTANDARD);
-    if (pluginRegistry != nullptr)
-    {
-        creator = pluginRegistry->getPluginCreator(pluginName.c_str(), pluginVersion.c_str(), pluginNamespace.c_str());
-    }
+    auto& pluginRegistry = ctx->network()->getBuilder().getPluginRegistry();
+    creator = pluginRegistry.getPluginCreator(pluginName.c_str(), pluginVersion.c_str(), pluginNamespace.c_str());
 #endif // ENABLE_STD_PLUGIN
 
 #if ENABLE_SAFE_PLUGIN
-    auto safetyPluginRegistry = nvinfer1::getBuilderPluginRegistry(nvinfer1::EngineCapability::kSAFETY);
+    auto safetyPluginRegistry = nvinfer1::getBuilderSafePluginRegistry(nvinfer1::EngineCapability::kSAFETY);
     if (creator == nullptr && safetyPluginRegistry != nullptr)
     {
         creator = safetyPluginRegistry->getPluginCreator(
@@ -1230,6 +1234,66 @@ bool isDynamic(const nvinfer1::Dims& shape)
     return std::any_of(shape.d, shape.d + shape.nbDims, [](int dim) { return dim < 0; });
 }
 
+NodeImportResult instanceNormPluginHelper(
+    IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, std::vector<TensorOrWeights>& inputs)
+{
+    // Scales and biases must be initializers
+    ASSERT(inputs.at(1).is_weights() && "The scale tensor is required to be an initializer.",
+        ErrorCode::kUNSUPPORTED_NODE);
+    ASSERT(
+        inputs.at(2).is_weights() && "The bias tensor is required to be an initializer.", ErrorCode::kUNSUPPORTED_NODE);
+    nvinfer1::ITensor* tensorPtr = &convertToTensor(inputs.at(0), ctx);
+    int32_t nbDims = tensorPtr->getDimensions().nbDims;
+    ASSERT(nbDims >= 3 && nbDims <= 5 && "TensorRT only supports InstanceNormalization on 3D, 4D, or 5D tensors!",
+        ErrorCode::kUNSUPPORTED_NODE);
+
+    const bool needToExpandDims = (nbDims == 3);
+    if (needToExpandDims)
+    {
+        // Expand spatial dims from 1D to 2D
+        std::vector<int32_t> const axes{3};
+        tensorPtr = unsqueezeTensor(ctx, node, *tensorPtr, axes);
+        ASSERT(tensorPtr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE);
+    }
+    auto scale_weights = inputs.at(1).weights();
+    auto bias_weights = inputs.at(2).weights();
+    OnnxAttrs attrs(node, ctx);
+    float epsilon = attrs.get("epsilon", 1e-5F);
+    int32_t const relu{0};  // the ONNX instance norm op does not use the relu parameter
+    float const alpha{0.F}; // the ONNX instance norm op does not use the alpha parameter
+
+    // Populate instanceNormalization plugin properties.
+    std::string const pluginName = "InstanceNormalization_TRT";
+    std::string const pluginVersion = "1";
+    std::vector<nvinfer1::PluginField> f;
+    f.emplace_back("epsilon", &epsilon, nvinfer1::PluginFieldType::kFLOAT32, 1);
+    f.emplace_back("scales", scale_weights.values, nvinfer1::PluginFieldType::kFLOAT32, scale_weights.count());
+    f.emplace_back("bias", bias_weights.values, nvinfer1::PluginFieldType::kFLOAT32, bias_weights.count());
+    f.emplace_back("relu", &relu, nvinfer1::PluginFieldType::kINT32, 1);
+    f.emplace_back("alpha", &alpha, nvinfer1::PluginFieldType::kFLOAT32, 1);
+
+    // Create plugin from registry
+    auto const plugin = createPlugin(getNodeName(node), importPluginCreator(ctx, pluginName, pluginVersion), f);
+
+    ASSERT(plugin != nullptr && "InstanceNormalization plugin was not found in the plugin registry!",
+        ErrorCode::kUNSUPPORTED_NODE);
+
+    auto* layer = ctx->network()->addPluginV2(&tensorPtr, 1, *plugin);
+    ctx->registerLayer(layer, node);
+    tensorPtr = layer->getOutput(0);
+
+    if (needToExpandDims)
+    {
+        // Un-expand spatial dims back to 1D
+        std::vector<int32_t> const axes{3};
+        tensorPtr = squeezeTensor(ctx, node, *tensorPtr, axes);
+        ASSERT(tensorPtr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE);
+    }
+
+    return {{tensorPtr}};
+
+}
+
 nvinfer1::ITensor* iota(IImporterContext* ctx, ShapeTensor iotaDims, int32_t axis)
 {
     std::vector<int32_t> deltaVals(iotaDims.size(), 0);
@@ -1549,6 +1613,49 @@ nvinfer1::Dims makeDims(int nbDims, int val)
     return dims;
 }
 
+NodeImportResult normalizationHelper(
+    IImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, std::vector<TensorOrWeights>& inputs)
+{
+    auto* input = &convertToTensor(inputs.at(0), ctx);
+    auto* scale = &convertToTensor(inputs.at(1), ctx);
+    auto* bias = &convertToTensor(inputs.at(2), ctx);
+
+    OnnxAttrs attrs(node, ctx);
+    float epsilon = attrs.get("epsilon", 1e-5f);
+    int32_t nbGroups = attrs.get("num_groups", 1);
+
+    auto nbDims = input->getDimensions().nbDims;
+    ASSERT(nbDims >= 3 && "Input to normalization should be at least 3D!", ErrorCode::kINVALID_NODE);
+
+    // Need to broadcast scale and bias to the input shape. Note that normal broadcasting rules cannot be applied
+    // as scale and bias are 1D and need to be broadcasted to shape [1, S, 1, 1, ...].
+    uint32_t axesMask{0};
+    std::vector<int32_t> unsqueezeAxes;
+
+    for (int32_t i = 0; i < nbDims; i++)
+    {
+        if (i == 1)
+        {
+            continue;
+        }
+        // Axes should correspond to the spatial dimensions
+        if (i >= 2)
+        {
+            axesMask |= 1 << i;
+        }
+        unsqueezeAxes.push_back(i);
+    }
+
+    scale = unsqueezeTensor(ctx, node, *scale, unsqueezeAxes);
+    bias = unsqueezeTensor(ctx, node, *bias, unsqueezeAxes);
+
+    auto* layer = ctx->network()->addNormalization(*input, *scale, *bias, axesMask);
+    layer->setEpsilon(epsilon);
+    layer->setNbGroups(nbGroups);
+    ctx->registerLayer(layer, node);
+    return {{layer->getOutput(0)}};
+}
+
 nvinfer1::Dims insertDimension(const nvinfer1::Dims& dims, const int axis, const int value)
 {
     assert(dims.nbDims < nvinfer1::Dims::MAX_DIMS && axis < nvinfer1::Dims::MAX_DIMS);
@@ -1682,7 +1789,7 @@ NodeImportResult poolingHelper(IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProt
     poolingLayer->setPrePadding(beg_padding);
     poolingLayer->setPostPadding(end_padding);
 
-    ctx->registerLayer(poolingLayer, getNodeName(node));
+    ctx->registerLayer(poolingLayer, node);
     tensorPtr = poolingLayer->getOutput(0);
     dims = tensorPtr->getDimensions();
     if (needToExpandDims)
@@ -1726,7 +1833,7 @@ NodeImportResult reduceTensor(IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto
     }
 
     auto* layer = ctx->network()->addReduce(tensor, operation, axisMask, keepdims);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     return {{layer->getOutput(0)}};
 }
 
@@ -1776,7 +1883,7 @@ NodeImportResult scaleHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE::Node
     auto* layer = ctx->network()->addScaleNd(*tensorPtr, mode, shift, scale, power, 1);
     ASSERT(layer && "Failed to add a Scale layer.", ErrorCode::kUNSUPPORTED_NODE);
     // Register layer name, and shift and scale weight names for the refit map.
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     ctx->network()->setWeightsName(shift, shiftName);
     ctx->network()->setWeightsName(scale, scaleName);
 
@@ -1787,7 +1894,6 @@ NodeImportResult scaleHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE::Node
         tensorPtr = &reshape(ctx, *tensorPtr, origShape);
         ASSERT(tensorPtr && "Failed to reshape tensor.", ErrorCode::kUNSUPPORTED_NODE);
     }
-
     return {{tensorPtr}};
 }
 
@@ -1858,7 +1964,7 @@ nvinfer1::ITensor* squeezeTensor(IImporterContext* ctx, const ::ONNX_NAMESPACE::
     nvinfer1::IShuffleLayer* squeezeLayer = addShuffle(ctx, tensor, newDims);
     if (regLayer)
     {
-        ctx->registerLayer(squeezeLayer, getNodeName(node));
+        ctx->registerLayer(squeezeLayer, node);
     }
     return squeezeLayer->getOutput(0);
 }
@@ -1869,7 +1975,7 @@ nvinfer1::ITensor* transposeTensor(IImporterContext* ctx, const ::ONNX_NAMESPACE
     const nvinfer1::Dims shape = tensor.getDimensions();
 
     nvinfer1::IShuffleLayer* layer = ctx->network()->addShuffle(tensor);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     if (!layer)
     {
         return nullptr;
@@ -1899,7 +2005,6 @@ NodeImportResult unaryHelper(
     IImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, TensorOrWeights& input, nvinfer1::UnaryOperation op)
 {
     nvinfer1::ITensor* tensorPtr = &convertToTensor(input, ctx);
-    const auto rank = tensorPtr->getDimensions().nbDims;
     const auto inputType = tensorPtr->getType();
 
     bool validUnaryType = true;
@@ -1930,6 +2035,11 @@ NodeImportResult unaryHelper(
         validUnaryType = (inputType != nvinfer1::DataType::kBOOL && inputType != nvinfer1::DataType::kUINT8);
         break;
     }
+    case nvinfer1::UnaryOperation::kISINF:
+    {
+        validUnaryType = (inputType == nvinfer1::DataType::kFLOAT || inputType == nvinfer1::DataType::kHALF);
+        break;
+    }
     default:
     {
         // By default TRT does not support BOOL, INT32, UINT8 types for Unary operations.
@@ -1941,24 +2051,10 @@ NodeImportResult unaryHelper(
             && "This version of TensorRT does not support the given operator with the given input data type.",
         ErrorCode::kUNSUPPORTED_NODE);
 
-    // Support scalar inputs by unsqueezing to 1D
-    if (rank == 0)
-    {
-        std::vector<int> axes{0};
-        tensorPtr = unsqueezeTensor(ctx, node, *tensorPtr, axes);
-    }
-
     nvinfer1::IUnaryLayer* layer = ctx->network()->addUnary(*tensorPtr, op);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     tensorPtr = layer->getOutput(0);
 
-    // Squeeze scalar inputs back into a scalar
-    if (rank == 0)
-    {
-        std::vector<int> axes{0};
-        tensorPtr = squeezeTensor(ctx, node, *tensorPtr, axes);
-    }
-
     return {{tensorPtr}};
 }
 
@@ -2074,7 +2170,7 @@ NodeImportResult convMultiInput(
     {
         layer->setInput(2, *bias_tensor_ptr);
     }
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
 
     nvinfer1::ITensor* output_tensor_ptr = layer->getOutput(0);
     if (needToExpandDims)
@@ -2113,7 +2209,7 @@ nvinfer1::ITensor* unsqueezeTensor(IImporterContext* ctx, const ::ONNX_NAMESPACE
     nvinfer1::IShuffleLayer* unsqueezeLayer = addShuffle(ctx, tensor, newDims);
     if (regLayer)
     {
-        ctx->registerLayer(unsqueezeLayer, getNodeName(node));
+        ctx->registerLayer(unsqueezeLayer, node);
     }
     return unsqueezeLayer->getOutput(0);
 }
@@ -2246,7 +2342,7 @@ nvinfer1::ITensor* addSoftmax(IImporterContext* ctx, const ::ONNX_NAMESPACE::Nod
         // ONNX softmax is always on second dimension.
         softMax->setAxes(1 << 1);
     }
-    ctx->registerLayer(softMax, node.name());
+    ctx->registerLayer(softMax, node);
     return softMax->getOutput(0);
 }
 
@@ -2285,16 +2381,16 @@ NodeImportResult addScatterLayer(IImporterContext* ctx, ::ONNX_NAMESPACE::NodePr
 
     auto* layer = ctx->network()->addScatter(data, indices, updates, mode);
     layer->setAxis(axis);
-    ctx->registerLayer(layer, getNodeName(node));
+    ctx->registerLayer(layer, node);
     return {{layer->getOutput(0)}};
 }
 
-//! Helper function to calculate mod(A, B)  
+//! Helper function to calculate mod(A, B)
 nvinfer1::IElementWiseLayer* modWithIntegerInputs(IImporterContext* ctx, nvinfer1::ITensor* input0, nvinfer1::ITensor* input1, bool fmod){
     using eOp = nvinfer1::ElementWiseOperation;
     auto divOp = eOp::kFLOOR_DIV;
     if (fmod) divOp = eOp::kDIV;
-    
+
     // input0 - (input1 * divOp(input0, input1))
     return ctx->network()->addElementWise(*input0,
                 *ctx->network()->addElementWise(*input1,
@@ -2334,4 +2430,60 @@ float* convertFP16Data(void* weightValues, nvinfer1::Dims shape, IImporterContex
     return newWeights;
 }
 
+std::string filterDocString(std::string const& docString)
+{
+    auto splitString = [](auto const& docString) {
+        std::vector<std::string> lines;
+
+        std::stringstream ss(docString);
+        std::string line;
+        while (std::getline(ss, line, '\n'))
+        {
+            lines.push_back(line);
+        }
+
+        return lines;
+    };
+
+    std::ostringstream filteredDocStream;
+    // The doc strings that PyTorch lib generates contain the literal `site-packages` or `dist-packages`.
+    // We filter such lines out to keep only the doc strings of the user-programmed codes.
+    std::vector<std::string> patterns{"site-packages", "dist-packages"};
+    std::vector<std::string> lines = splitString(docString);
+    for (auto &line: lines) {
+        bool writeLine = true;
+        for (auto &pattern : patterns)
+        {
+            if (line.find(pattern) != std::string::npos)
+            {
+                writeLine = false;
+                break;
+            }
+        }
+        if (writeLine)
+        {
+            // A double-quote substring in a line breaks the JSON format.
+            // For that reason, we change it to a single-quote substring, if any.
+            std::replace(line.begin(), line.end(), '\"', '\'');
+            filteredDocStream << " | " << line;
+        }
+    }
+    return filteredDocStream.str();
+}
+
+Status processMetadata(::ONNX_NAMESPACE::NodeProto const& node, nvinfer1::ILayer* layer)
+{
+    std::string docString = node.doc_string();
+    std::string filteredDocString = "[ONNX Layer: " + getNodeName(node);
+
+    if (docString.size() != 0)
+    {
+        filteredDocString += filterDocString(docString);
+    }
+    filteredDocString += "]";
+
+    ASSERT((layer != nullptr) && "The layer object does not exist.", ErrorCode::kUNSUPPORTED_NODE);
+    layer->setMetadata(filteredDocString.c_str());
+    return Status::success();
+}
 } // namespace onnx2trt
diff --git a/onnx2trt_utils.hpp b/onnx2trt_utils.hpp
index fbb7ba63..7cafa9e9 100644
--- a/onnx2trt_utils.hpp
+++ b/onnx2trt_utils.hpp
@@ -75,6 +75,7 @@ static std::ostream& operator<<(std::ostream& stream, const nvinfer1::DataType&
     case nvinfer1::DataType::kUINT8: return stream << "uint8";
     case nvinfer1::DataType::kINT32: return stream << "int32";
     case nvinfer1::DataType::kBOOL: return stream << "bool";
+    case nvinfer1::DataType::kFP8: return stream << "float8";
     default: throw std::runtime_error("Unknown dtype");
     }
 }
@@ -270,12 +271,16 @@ nvinfer1::ITensor* greaterLessOrEqual(IImporterContext* ctx, const ::ONNX_NAMESP
 // Helper function to determine if a shape contains dynamic dimensions
 bool isDynamic(const nvinfer1::Dims& shape);
 
+// Helper function to use optimized 3D instanceNorm plugin
+NodeImportResult instanceNormPluginHelper(
+    IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, std::vector<TensorOrWeights>& inputs);
+
 // Helper fucntion to create an iota fill given a set of dimensions and an axis
 nvinfer1::ITensor* iota(IImporterContext* ctx, ShapeTensor iotaDims, int32_t axis);
 
 // Helper function to load a creator from the registry
-nvinfer1::IPluginCreator* importPluginCreator(
-    const std::string& pluginName, const std::string& pluginVersion, const std::string& pluginNamespace = "");
+nvinfer1::IPluginCreator* importPluginCreator(IImporterContext* ctx, std::string const& pluginName,
+    std::string const& pluginVersion, std::string const& pluginNamespace = "");
 
 // Helper function to get a plugin from the PluginRegistry
 std::unique_ptr<nvinfer1::IPluginV2, PluginDeleter> createPlugin(const std::string& name,
@@ -291,6 +296,10 @@ NodeImportResult lstmLegacyImporter(
 // Helper function to create and fill a Dims object with defined values
 nvinfer1::Dims makeDims(int nbDims, int val);
 
+// Helper function to create normalization layers for GroupNorm and InstanceNorm
+NodeImportResult normalizationHelper(
+    IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, std::vector<TensorOrWeights>& inputs);
+
 // Helper function to parse activation values for LSTM nodes
 std::vector<float> parseLSTMActivationValues(const std::vector<nvinfer1::ActivationType>& activationTypes,
     const std::vector<float>& activationValues, bool isAlpha);
@@ -455,4 +464,5 @@ float* convertFP16Data(void* weightValues, nvinfer1::Dims shape, IImporterContex
 // Helper function to validate input types for an ONNX node
 Status notInvalidType(TensorOrWeights const& input, std::vector<std::string> const& invalidTypes);
 
+Status processMetadata(::ONNX_NAMESPACE::NodeProto const& node, nvinfer1::ILayer* layer);
 } // namespace onnx2trt
diff --git a/trt_utils.hpp b/trt_utils.hpp
index 71cd8f78..fe4af868 100644
--- a/trt_utils.hpp
+++ b/trt_utils.hpp
@@ -22,7 +22,8 @@ inline int getDtypeSize(nvinfer1::DataType trtDtype)
     {
     case nvinfer1::DataType::kFLOAT: return 4;
     case nvinfer1::DataType::kUINT8:
-    case nvinfer1::DataType::kINT8: return 1;
+    case nvinfer1::DataType::kINT8:
+    case nvinfer1::DataType::kFP8: return 1;
     case nvinfer1::DataType::kHALF: return 2;
     case nvinfer1::DataType::kINT32:
         return 4;
@@ -156,9 +157,9 @@ inline ::ONNX_NAMESPACE::TensorProto_DataType trtDataTypeToONNX(nvinfer1::DataTy
     case nvinfer1::DataType::kINT8: return ::ONNX_NAMESPACE::TensorProto::INT8;
     case nvinfer1::DataType::kBOOL: return ::ONNX_NAMESPACE::TensorProto::BOOL;
     case nvinfer1::DataType::kUINT8: return ::ONNX_NAMESPACE::TensorProto::UINT8;
-    default: return ::ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED;
+    case nvinfer1::DataType::kFP8: break;
     }
-    throw std::runtime_error{"Unreachable"};
+    return ::ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED;
 }
 
 } // namespace onnx2trt