diff --git a/.gitmodules b/.gitmodules
index f3fce904..4feacda6 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,4 +1,4 @@
 [submodule "third_party/onnx"]
 	path = third_party/onnx
 	url = https://github.com/onnx/onnx.git
-	branch = rel-1.12.0
+	branch = v1.13.1
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 21abe1c9..0d97c405 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -29,7 +29,7 @@ add_definitions("-DSOURCE_LENGTH=${SOURCE_LENGTH}")
 #--------------------------------------------------
 set(ONNX2TRT_MAJOR 8)
 set(ONNX2TRT_MINOR 6)
-set(ONNX2TRT_PATCH 0)
+set(ONNX2TRT_PATCH 1)
 set(ONNX2TRT_VERSION "${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}.${ONNX2TRT_PATCH}" CACHE STRING "ONNX2TRT version")
 
 #--------------------------------------------------
diff --git a/NvOnnxParser.h b/NvOnnxParser.h
index fcc93ce8..24d368ca 100644
--- a/NvOnnxParser.h
+++ b/NvOnnxParser.h
@@ -80,7 +80,7 @@ constexpr inline int32_t EnumMax<ErrorCode>()
 
 //!
 //! \brief Represents one or more OnnxParserFlag values using binary OR
-//! operations, e.g., 1U << OnnxParserFlag::kVERSION_COMPATIBLE
+//! operations, e.g., 1U << OnnxParserFlag::kNATIVE_INSTANCENORM
 //!
 //! \see IParser::setFlags() and IParser::getFlags()
 //!
@@ -249,6 +249,26 @@ class IParser
     //!
     virtual void clearErrors() = 0;
 
+    virtual ~IParser() noexcept = default;
+
+    //!
+    //! \brief Query the plugin libraries needed to implement operations used by the parser in a version-compatible
+    //! engine.
+    //!
+    //! This provides a list of plugin libraries on the filesystem needed to implement operations
+    //! in the parsed network.  If you are building a version-compatible engine using this network,
+    //! provide this list to IBuilderConfig::setPluginsToSerialize to serialize these plugins along
+    //! with the version-compatible engine, or, if you want to ship these plugin libraries externally
+    //! to the engine, ensure that IPluginRegistry::loadLibrary is used to load these libraries in the
+    //! appropriate runtime before deserializing the corresponding engine.
+    //!
+    //! \param[out] nbPluginLibs Returns the number of plugin libraries in the array, or -1 if there was an error.
+    //! \return Array of `nbPluginLibs` C-strings describing plugin library paths on the filesystem if nbPluginLibs > 0,
+    //! or nullptr otherwise.  This array is owned by the IParser, and the pointers in the array are only valid until
+    //! the next call to parse(), supportsModel(), parseFromFile(), or parseWithWeightDescriptors().
+    //!
+    virtual char const* const* getUsedVCPluginLibraries(int64_t& nbPluginLibs) const noexcept = 0;
+
     //!
     //! \brief Set the parser flags.
     //!
@@ -297,26 +317,6 @@ class IParser
     //! \return True if flag is set, false if unset.
     //!
     virtual bool getFlag(OnnxParserFlag onnxParserFlag) const noexcept = 0;
-
-    virtual ~IParser() noexcept = default;
-
-    //!
-    //! \brief Query the plugin libraries needed to implement operations used by the parser in a version-compatible
-    //! engine.
-    //!
-    //! This provides a list of plugin libraries on the filesystem needed to implement operations
-    //! in the parsed network.  If you are building a version-compatible engine using this network,
-    //! provide this list to IBuilderConfig::setPluginsToSerialize to serialize these plugins along
-    //! with the version-compatible engine, or, if you want to ship these plugin libraries externally
-    //! to the engine, ensure that IPluginRegistry::loadLibrary is used to load these libraries in the
-    //! appropriate runtime before deserializing the corresponding engine.
-    //!
-    //! \param[out] nbPluginLibs Returns the number of plugin libraries in the array, or -1 if there was an error.
-    //! \return Array of `nbPluginLibs` C-strings describing plugin library paths on the filesystem if nbPluginLibs > 0,
-    //! or nullptr otherwise.  This array is owned by the IParser, and the pointers in the array are only valid until
-    //! the next call to parse(), supportsModel(), parseFromFile(), or parseWithWeightDescriptors().
-    //!
-    virtual char const* const* getUsedVCPluginLibraries(int64_t& nbPluginLibs) const noexcept = 0;
 };
 
 } // namespace nvonnxparser
diff --git a/README.md b/README.md
index 952b789e..81364a9e 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ For press and other inquiries, please contact Hector Marinez at hmarinez@nvidia.
 
 ## Supported TensorRT Versions
 
-Development on the `main` branch is for the latest version of [TensorRT 8.6.0](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
+Development on the `main` branch is for the latest version of [TensorRT 8.6](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
 
 For previous versions of TensorRT, refer to their respective branches.
 
@@ -48,8 +48,8 @@ Current supported ONNX operators are found in the [operator support matrix](docs
 ### Dependencies
 
  - [Protobuf >= 3.0.x](https://github.com/google/protobuf/releases)
- - [TensorRT 8.6.0](https://developer.nvidia.com/tensorrt)
- - [TensorRT 8.6.0 open source libaries (main branch)](https://github.com/NVIDIA/TensorRT/)
+ - [TensorRT 8.6](https://developer.nvidia.com/tensorrt)
+ - [TensorRT 8.6 open source libaries (main branch)](https://github.com/NVIDIA/TensorRT/)
 
 ### Building
 
@@ -65,10 +65,19 @@ Once you have cloned the repository, you can build the parser libraries and exec
 
 Note that this project has a dependency on CUDA. By default the build will look in `/usr/local/cuda` for the CUDA toolkit installation. If your CUDA path is different, overwrite the default path by providing `-DCUDA_TOOLKIT_ROOT_DIR=<path_to_cuda_install>` in the CMake command.
 
-### Experimental Ops
-All experimental operators will be considered unsupported by the ONNX-TRT's `supportsModel()` function.
+### InstanceNormalizaiton Performance
 
-`NonMaxSuppression` is available as an experimental operator in TensorRT 8. It has the limitation that the output shape is always padded to length [`max_output_boxes_per_class`, 3], therefore some post processing is required to extract the valid indices.
+In TensorRT 8.6 there are two implementations of InstanceNormalization that may perform differently depending on various parameters. By default the parser will insert an InstanceNormalization plugin layer as it performs best for general use cases. Users that want to benchmark using the native TensorRT implementation of InstanceNorm can set the parser flag `kNATIVE_INSTANCENORM` prior to parsing the model. For building version compatible or hardware compatible engines, this flag must be set.
+
+C++ Example:
+
+    auto flag = 1U << static_cast<uint32_t>(nvonnxparser::OnnxParserFlag::kNATIVE_INSTANCENORM);
+    parser->setFlags(flag);
+
+Python Example:
+
+    flag = 1 << (int)(trt.OnnxParserFlag.NATIVE_INSTANCENORM)
+    parser.flags = flag
 
 ## Executable Usage
 
@@ -92,9 +101,9 @@ Python bindings for the ONNX-TensorRT parser are packaged in the shipped `.whl`
 
     python3 -m pip install <tensorrt_install_dir>/python/tensorrt-8.x.x.x-cp<python_ver>-none-linux_x86_64.whl
 
-TensorRT 8.6.0 supports ONNX release 1.12.0. Install it with:
+TensorRT 8.6 supports ONNX release 1.13.1. Install it with:
 
-    python3 -m pip install onnx==1.12.0
+    python3 -m pip install onnx==1.13.1
 
 The ONNX-TensorRT backend can be installed by running:
 
diff --git a/builtin_op_importers.cpp b/builtin_op_importers.cpp
index abc60fd3..52cb2480 100644
--- a/builtin_op_importers.cpp
+++ b/builtin_op_importers.cpp
@@ -2342,7 +2342,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Loop)
     constexpr int32_t NB_NON_STATE_INPUTS = 2; // First 2 inputs are trip count and condition respectively.
     constexpr int32_t NB_DISCARDED_OUTPUTS
         = 1; // First output is the updated value of the condition, and is ignored by the outer loop node.
-    constexpr int32_t DUMMY_SCAN_OUTPUT_LENGTH = 1;
+    constexpr int32_t DUMMY_SCAN_OUTPUT_LENGTH = 1024;
     ASSERT((inputs.size() >= 2) && "The Loop operator requires at least 2 inputs.", ErrorCode::kINVALID_NODE);
     OnnxAttrs attrs(node, ctx);
     int32_t const nbInputs = node.input().size();
@@ -4947,13 +4947,13 @@ DEFINE_BUILTIN_OP_IMPORTER(Trilu)
     using eOp = nvinfer1::ElementWiseOperation;
     auto* data = &convertToTensor(inputs.at(0), ctx);
     auto const nbDims = data->getDimensions().nbDims;
-    ASSERT((nbDims == 2 || nbDims == 3) && "Trilu input must have 2 or 3 input dimensions!", ErrorCode::kINVALID_NODE);
+    ASSERT((nbDims >= 2) && "Trilu input must have at least 2 dimensions!", ErrorCode::kINVALID_NODE);
     OnnxAttrs attrs(node, ctx);
     int32_t const upper = attrs.get("upper", 0);
 
     // Input may be in a batch so we need to get NxM dimensions
-    int64_t const N = nbDims == 2 ? 0 : 1;
-    int64_t const M = nbDims == 2 ? 1 : 2;
+    int64_t const N = nbDims - 2;
+    int64_t const M = nbDims - 1;
 
     // Create iota dims of NxM
     const ShapeTensor iotadims
@@ -4975,11 +4975,13 @@ DEFINE_BUILTIN_OP_IMPORTER(Trilu)
         cols = &elementwiseHelper(ctx, node, {cols, k}, eOp::kSUB).value().at(0).tensor();
     }
 
-    // Unsqueeze to broadcast rows/cols to 3D if necessary during next elementwise operation
-    if (nbDims == 3)
+    // Unsqueeze to broadcast rows/cols if necessary during next elementwise operation.
+    if (nbDims > 2)
     {
-        rows = unsqueezeTensor(ctx, node, *rows, {0});
-        cols = unsqueezeTensor(ctx, node, *cols, {0});
+        std::vector<int32_t> batchDims(nbDims - 2);
+        std::iota(batchDims.begin(), batchDims.end(), 0);
+        rows = unsqueezeTensor(ctx, node, *rows, batchDims);
+        cols = unsqueezeTensor(ctx, node, *cols, batchDims);
     }
 
     // For lower Trilus, use greaterOrEquals. For upper Trilus, use lessOrEquals
diff --git a/docs/Changelog.md b/docs/Changelog.md
index 468ae969..4d9052b4 100644
--- a/docs/Changelog.md
+++ b/docs/Changelog.md
@@ -2,6 +2,14 @@
 
 # ONNX-TensorRT Changelog
 
+# TensorRT 8.6 GA Release - 2023-5-1
+For more details, see the 8.6 GA release notes for the fixes since 8.6 EA.
+
+- Renamed `kVERSION_COMPATIBLE` flag to `kNATIVE_INSTANCENORM`
+- Added support for N-D `Trilu`
+- Removed old LSTM importer
+- Updated ONNX submodule to v1.13.1.
+
 # TensorRT 8.6 EA Release - 2023-3-13
 
 ## Added
diff --git a/onnx2trt_utils.cpp b/onnx2trt_utils.cpp
index b7d19a34..b407000e 100644
--- a/onnx2trt_utils.cpp
+++ b/onnx2trt_utils.cpp
@@ -1333,278 +1333,6 @@ bool isTransposeRequired(nvinfer1::Dims const& shape, nvinfer1::Permutation cons
     return false;
 }
 
-NodeImportResult lstmLegacyImporter(
-    IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, std::vector<TensorOrWeights>& inputs)
-{
-    // Input
-    nvinfer1::ITensor& raw_input = convertToTensor(inputs.at(0), ctx);
-    ASSERT(3 == raw_input.getDimensions().nbDims && "Input tensor must be 3 dimensional", ErrorCode::kINVALID_NODE);
-    ASSERT((raw_input.getType() == nvinfer1::DataType::kFLOAT || raw_input.getType() == nvinfer1::DataType::kHALF)
-            && "Only fp16 and fp32 inputs are supported",
-        ErrorCode::kUNSUPPORTED_NODE);
-    const nvinfer1::DataType input_type = raw_input.getType();
-    const int32_t max_seq_len = raw_input.getDimensions().d[0];
-    const int32_t batch_size = raw_input.getDimensions().d[1];
-
-    // Attributes
-    OnnxAttrs attrs(node, ctx);
-    const std::string direction_str = attrs.get<std::string>("direction", "forward");
-    ASSERT((direction_str == "forward" || direction_str == "bidirectional") && "Reverse LSTM unsupported",
-        ErrorCode::kUNSUPPORTED_NODE);
-    const nvinfer1::RNNDirection direction
-        = (direction_str == "forward") ? nvinfer1::RNNDirection::kUNIDIRECTION : nvinfer1::RNNDirection::kBIDIRECTION;
-    const int num_directions = (direction_str == "forward") ? 1 : 2;
-    // There are three distinct uses of an activation function within the LSTM equations
-    // One for the input/forget/output gates, one for the cell state, and one for the output
-    // RNNv2 only supports the default choice for each, listed here (duplicated for bidirectional)
-    std::vector<std::string> default_activations = {"Sigmoid", "Tanh", "Tanh"};
-    if (num_directions == 2)
-    {
-        default_activations.insert(default_activations.end(), {"Sigmoid", "Tanh", "Tanh"});
-    }
-    const std::vector<std::string> activations
-        = attrs.get<std::vector<std::string>>("activations", default_activations);
-    ASSERT(activations == default_activations && "Nonstandard activations within LSTM unsupported",
-        ErrorCode::kUNSUPPORTED_NODE);
-    const float clip = attrs.get<float>("clip", 0.0f);
-    ASSERT(clip == 0.0f && "Clipping unsupported", ErrorCode::kUNSUPPORTED_NODE);
-    const int32_t hidden_size = attrs.get<int>("hidden_size");
-    ASSERT(hidden_size > 0, ErrorCode::kINVALID_NODE);
-    const int32_t input_forget = attrs.get<int>("input_forget", 0);
-    ASSERT(0 == input_forget && "Coupled input/forget unsupported", ErrorCode::kUNSUPPORTED_NODE);
-
-    // Optional Inputs
-    bool has_bias = false;
-    nvinfer1::ITensor* sequence_lens = nullptr;
-    nvinfer1::ITensor* initial_h = nullptr;
-    nvinfer1::ITensor* initial_c = nullptr;
-    for (int i = 3; i < node.input_size(); i++)
-    {
-        const std::string& input_name = node.input(i);
-        if (input_name == "B")
-        {
-            has_bias = true;
-        }
-        else if (input_name == "sequence_lens")
-        {
-            sequence_lens = &(convertToTensor(inputs.at(i), ctx));
-            ASSERT(sequence_lens && sequence_lens->getType() == nvinfer1::DataType::kINT32
-                    && "Failed to process sequence_lens (sequence_lens must be int32)",
-                ErrorCode::kINVALID_NODE);
-        }
-        else if (input_name == "initial_h" || input_name == "initial_c")
-        {
-            nvinfer1::ITensor* output = nullptr;
-            if (inputs.at(i).is_weights())
-            {
-                /* constant->shuffle bug (NVBug 2650549), so we do the transpose manually */
-                ShapedWeights weights = inputs.at(i).weights();
-                const int dtype_size = getDtypeSize(weights.type);
-                const size_t len = num_directions * batch_size * hidden_size * dtype_size;
-                auto* source = reinterpret_cast<unsigned char*>(weights.values);
-                std::vector<unsigned char> buffer;
-                buffer.resize(len);
-                for (int d = 0; d < num_directions; d++)
-                {
-                    for (int j = 0; j < batch_size; j++)
-                    {
-                        for (int k = 0; k < hidden_size; k++)
-                        {
-                            for (int b = 0; b < dtype_size; b++)
-                            {
-                                int src_idx = d * batch_size * hidden_size * dtype_size + j * hidden_size * dtype_size
-                                    + k * dtype_size + b;
-                                int buf_idx = j * num_directions * hidden_size * dtype_size
-                                    + d * hidden_size * dtype_size + k * dtype_size + b;
-                                buffer.at(buf_idx) = source[src_idx];
-                            }
-                        }
-                    }
-                }
-                std::memcpy(weights.values, static_cast<void*>(buffer.data()), len);
-                const nvinfer1::Dims new_dims = {3, {batch_size, num_directions, hidden_size}};
-                output = ctx->network()->addConstant(new_dims, weights)->getOutput(0);
-                ASSERT(output && "Failed to convert initial_h or initial_c weights to constant layer",
-                    ErrorCode::kINTERNAL_ERROR);
-            }
-            else
-            {
-                /* TODO: Once NVBug 2650549 is fixed, we can use just this path instead */
-                /* nvinfer1::ITensor& source = convertToTensor(inputs.at(i), ctx); */
-                nvinfer1::ITensor& source = inputs.at(i).tensor();
-                auto* shuffle_layer = ctx->network()->addShuffle(source);
-                ASSERT(shuffle_layer && "Failed to create initial_h shuffle layer", ErrorCode::kINTERNAL_ERROR);
-                shuffle_layer->setFirstTranspose(nvinfer1::Permutation{1, 0, 2});
-                output = shuffle_layer->getOutput(0);
-            }
-            ASSERT(output->getType() == input_type && "initial_h and initial_c datatype must match input",
-                ErrorCode::kINVALID_NODE);
-            if (input_name == "initial_h")
-            {
-                initial_h = output;
-            }
-            else
-            {
-                initial_c = output;
-            }
-        }
-        else if (input_name == "P")
-        {
-            ASSERT(false && "Peephole connections not supported", ErrorCode::kUNSUPPORTED_NODE);
-        }
-    }
-
-    // Input Shuffle Layer
-    auto* input_shuffle_layer = ctx->network()->addShuffle(raw_input);
-    ASSERT(input_shuffle_layer && "Failed to create input shuffle layer", ErrorCode::kINTERNAL_ERROR);
-    input_shuffle_layer->setFirstTranspose(nvinfer1::Permutation{1, 0, 2});
-
-    // RNNv2 Layer
-    nvinfer1::ITensor& input_seqs = *(input_shuffle_layer->getOutput(0));
-    const nvinfer1::RNNOperation op = nvinfer1::RNNOperation::kLSTM;
-    const int32_t layer_count = 1;
-    auto* layer = ctx->network()->addRNNv2(input_seqs, layer_count, hidden_size, max_seq_len, op);
-    ASSERT(layer && "Failed to create RNNv2 layer", ErrorCode::kINTERNAL_ERROR);
-    layer->setInputMode(nvinfer1::RNNInputMode::kLINEAR);
-    layer->setDirection(direction);
-    if (sequence_lens)
-    {
-        layer->setSequenceLengths(*sequence_lens);
-    }
-    if (initial_h)
-    {
-        layer->setHiddenState(*initial_h);
-    }
-    if (initial_c)
-    {
-        layer->setCellState(*initial_c);
-    }
-
-    // Weights
-    ASSERT(inputs.at(1).is_weights() && "W must be constant", ErrorCode::kUNSUPPORTED_NODE);
-    ASSERT(inputs.at(2).is_weights() && "R must be constant", ErrorCode::kUNSUPPORTED_NODE);
-    ShapedWeights gate_weights = inputs.at(1).weights();
-    ShapedWeights rcur_weights = inputs.at(2).weights();
-
-    nvinfer1::DataType gate_weights_type, rcur_weights_type;
-    ASSERT(convertDtype(gate_weights.type, &gate_weights_type) && "Bad datatype in W", ErrorCode::kINTERNAL_ERROR);
-    ASSERT(convertDtype(rcur_weights.type, &rcur_weights_type) && "Bad datatype in R", ErrorCode::kINTERNAL_ERROR);
-    ASSERT(input_type == gate_weights_type && "W datatype must match X", ErrorCode::kINVALID_NODE);
-    ASSERT(input_type == rcur_weights_type && "R datatype must match X", ErrorCode::kINVALID_NODE);
-
-    ShapedWeights bias_weights;
-    if (has_bias)
-    {
-        ASSERT(inputs.at(3).is_weights() && "B must be constant", ErrorCode::kUNSUPPORTED_NODE);
-        bias_weights = inputs.at(3).weights();
-        nvinfer1::DataType bias_weights_type;
-        ASSERT(convertDtype(bias_weights.type, &bias_weights_type) && "Bad datatype in B", ErrorCode::kINTERNAL_ERROR);
-        ASSERT(input_type == bias_weights_type && "B datatype must match X", ErrorCode::kINVALID_NODE);
-    }
-
-    const int data_size = (input_type == nvinfer1::DataType::kFLOAT) ? 4 : 2;
-    const int input_size = gate_weights.shape.d[2];
-
-    auto weightBuilder
-        = [input_type, data_size, hidden_size, ctx](int layer_index, ShapedWeights& src, int stride, int idx) {
-              nvinfer1::Weights w;
-              int direction_offset = data_size * layer_index * 4 * hidden_size * stride;
-              int gate_offset = data_size * hidden_size * stride * idx;
-              w.type = input_type;
-              w.values = reinterpret_cast<void*>(
-                  reinterpret_cast<unsigned char*>(src.values) + direction_offset + gate_offset);
-              w.count = hidden_size * stride;
-              return w;
-          };
-
-    // RNNv2 requires that a bias be set, even if none is provided
-    auto zeroes = ctx->createTempWeights(gate_weights.type, nvinfer1::Dims{1, {hidden_size}});
-    std::memset(zeroes.values, 0, data_size * hidden_size);
-
-    auto biasBuilder
-        = [input_type, data_size, hidden_size, has_bias, zeroes](int layer_index, ShapedWeights& src, int idx) {
-              nvinfer1::Weights b;
-              int direction_offset = data_size * layer_index * 8 * hidden_size;
-              int gate_offset = data_size * hidden_size * idx;
-              b.type = input_type;
-              if (has_bias)
-              {
-                  b.values = reinterpret_cast<void*>(
-                      reinterpret_cast<unsigned char*>(src.values) + direction_offset + gate_offset);
-              }
-              else
-              {
-                  b.values = zeroes.values;
-              }
-              b.count = hidden_size;
-              return b;
-          };
-
-    for (int layer_index = 0; layer_index < num_directions; layer_index++)
-    {
-        nvinfer1::Weights W_i = weightBuilder(layer_index, gate_weights, input_size, 0);
-        nvinfer1::Weights W_o = weightBuilder(layer_index, gate_weights, input_size, 1);
-        nvinfer1::Weights W_f = weightBuilder(layer_index, gate_weights, input_size, 2);
-        nvinfer1::Weights W_c = weightBuilder(layer_index, gate_weights, input_size, 3);
-        nvinfer1::Weights R_i = weightBuilder(layer_index, rcur_weights, hidden_size, 0);
-        nvinfer1::Weights R_o = weightBuilder(layer_index, rcur_weights, hidden_size, 1);
-        nvinfer1::Weights R_f = weightBuilder(layer_index, rcur_weights, hidden_size, 2);
-        nvinfer1::Weights R_c = weightBuilder(layer_index, rcur_weights, hidden_size, 3);
-
-        bool isW = true;
-        layer->setWeightsForGate(layer_index, nvinfer1::RNNGateType::kINPUT, isW, W_i);
-        layer->setWeightsForGate(layer_index, nvinfer1::RNNGateType::kOUTPUT, isW, W_o);
-        layer->setWeightsForGate(layer_index, nvinfer1::RNNGateType::kFORGET, isW, W_f);
-        layer->setWeightsForGate(layer_index, nvinfer1::RNNGateType::kCELL, isW, W_c);
-        isW = false;
-        layer->setWeightsForGate(layer_index, nvinfer1::RNNGateType::kINPUT, isW, R_i);
-        layer->setWeightsForGate(layer_index, nvinfer1::RNNGateType::kOUTPUT, isW, R_o);
-        layer->setWeightsForGate(layer_index, nvinfer1::RNNGateType::kFORGET, isW, R_f);
-        layer->setWeightsForGate(layer_index, nvinfer1::RNNGateType::kCELL, isW, R_c);
-
-        nvinfer1::Weights B_wi = biasBuilder(layer_index, bias_weights, 0);
-        nvinfer1::Weights B_wo = biasBuilder(layer_index, bias_weights, 1);
-        nvinfer1::Weights B_wf = biasBuilder(layer_index, bias_weights, 2);
-        nvinfer1::Weights B_wc = biasBuilder(layer_index, bias_weights, 3);
-        nvinfer1::Weights B_ri = biasBuilder(layer_index, bias_weights, 4);
-        nvinfer1::Weights B_ro = biasBuilder(layer_index, bias_weights, 5);
-        nvinfer1::Weights B_rf = biasBuilder(layer_index, bias_weights, 6);
-        nvinfer1::Weights B_rc = biasBuilder(layer_index, bias_weights, 7);
-
-        isW = true;
-        layer->setBiasForGate(layer_index, nvinfer1::RNNGateType::kINPUT, isW, B_wi);
-        layer->setBiasForGate(layer_index, nvinfer1::RNNGateType::kOUTPUT, isW, B_wo);
-        layer->setBiasForGate(layer_index, nvinfer1::RNNGateType::kFORGET, isW, B_wf);
-        layer->setBiasForGate(layer_index, nvinfer1::RNNGateType::kCELL, isW, B_wc);
-        isW = false;
-        layer->setBiasForGate(layer_index, nvinfer1::RNNGateType::kINPUT, isW, B_ri);
-        layer->setBiasForGate(layer_index, nvinfer1::RNNGateType::kOUTPUT, isW, B_ro);
-        layer->setBiasForGate(layer_index, nvinfer1::RNNGateType::kFORGET, isW, B_rf);
-        layer->setBiasForGate(layer_index, nvinfer1::RNNGateType::kCELL, isW, B_rc);
-    }
-
-    // Outputs
-    ASSERT((layer->getNbOutputs() == 3) && "3 outputs are required.", ErrorCode::kINTERNAL_ERROR);
-    ASSERT(
-        (node.output_size() <= 3) && "At most 3 outputs are allowed for the LSTM operator.", ErrorCode::kINVALID_NODE);
-    std::vector<TensorOrWeights> outputs;
-    for (int i = 0; i < node.output_size(); i++)
-    {
-        auto* shuffle_layer = ctx->network()->addShuffle(*(layer->getOutput(i)));
-        ASSERT(shuffle_layer && "Failed to create output shuffle layer", ErrorCode::kINTERNAL_ERROR);
-        shuffle_layer->setFirstTranspose(nvinfer1::Permutation{1, 0, 2});
-        if (i == 0)
-        {
-            nvinfer1::Dims Y_dims{4, {max_seq_len, batch_size, num_directions, hidden_size}};
-            shuffle_layer->setReshapeDimensions(Y_dims);
-            shuffle_layer->setZeroIsPlaceholder(false);
-            shuffle_layer->setSecondTranspose(nvinfer1::Permutation{0, 2, 1, 3});
-        }
-        outputs.emplace_back(shuffle_layer->getOutput(0));
-    }
-    return {outputs};
-}
-
 nvinfer1::Dims makeDims(int nbDims, int val)
 {
     // Zero all the dimensions, so that unused dimensions are deterministic even if accidentally used.
@@ -1694,7 +1422,6 @@ bool parseExternalWeights(IImporterContext* ctx, std::string file, std::string p
 #ifdef _MSC_VER
     size_t slash = path.rfind("\\");
     // When using WSL path can have "\" or "/". Need to check both options here.
-    // See bug https://nvbugs/3635640 for an example
     if (slash == std::string::npos)
     {
         slash = path.rfind("/");
diff --git a/onnx2trt_utils.hpp b/onnx2trt_utils.hpp
index 7cafa9e9..700b9ad2 100644
--- a/onnx2trt_utils.hpp
+++ b/onnx2trt_utils.hpp
@@ -289,10 +289,6 @@ std::unique_ptr<nvinfer1::IPluginV2, PluginDeleter> createPlugin(const std::stri
 // Helper function to determine if a transpose is required
 bool isTransposeRequired(nvinfer1::Dims const& shape, nvinfer1::Permutation const& perm);
 
-// Helper function to import LSTM ops through the legacy CUDNN path
-NodeImportResult lstmLegacyImporter(
-    IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, std::vector<TensorOrWeights>& inputs);
-
 // Helper function to create and fill a Dims object with defined values
 nvinfer1::Dims makeDims(int nbDims, int val);
 
diff --git a/onnx_tensorrt/__init__.py b/onnx_tensorrt/__init__.py
index 1218d549..87102a56 100644
--- a/onnx_tensorrt/__init__.py
+++ b/onnx_tensorrt/__init__.py
@@ -4,4 +4,4 @@
 
 from . import backend
 
-__version__ = "8.5.1"
+__version__ = "8.6.1"
diff --git a/third_party/onnx b/third_party/onnx
index f7ee1ac6..ad834eb7 160000
--- a/third_party/onnx
+++ b/third_party/onnx
@@ -1 +1 @@
-Subproject commit f7ee1ac60d06abe8e26c9b6bbe1e3db5286b614b
+Subproject commit ad834eb73ee0cd9b6fa9ea892caeed5fa17d7dc0