TensorRT 8.6.1 Parser Update (#915)

Signed-off-by: Ilya Sherstyuk <[email protected]> Co-authored-by: Kevin Chen <[email protected]>
onnx · May 4, 2023 · 6ba67d3 · 6ba67d3
1 parent 8af13d1
commit 6ba67d3
Show file tree

Hide file tree

Showing 10 changed files with 60 additions and 318 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -1,4 +1,4 @@
 [submodule "third_party/onnx"]
 	path = third_party/onnx
 	url = https://github.com/onnx/onnx.git
-	branch = rel-1.12.0
+	branch = v1.13.1
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -29,7 +29,7 @@ add_definitions("-DSOURCE_LENGTH=${SOURCE_LENGTH}")
 #--------------------------------------------------
 set(ONNX2TRT_MAJOR 8)
 set(ONNX2TRT_MINOR 6)
-set(ONNX2TRT_PATCH 0)
+set(ONNX2TRT_PATCH 1)
 set(ONNX2TRT_VERSION "${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}.${ONNX2TRT_PATCH}" CACHE STRING "ONNX2TRT version")
 
 #--------------------------------------------------

diff --git a/NvOnnxParser.h b/NvOnnxParser.h
@@ -80,7 +80,7 @@ constexpr inline int32_t EnumMax<ErrorCode>()
 
 //!
 //! \brief Represents one or more OnnxParserFlag values using binary OR
-//! operations, e.g., 1U << OnnxParserFlag::kVERSION_COMPATIBLE
+//! operations, e.g., 1U << OnnxParserFlag::kNATIVE_INSTANCENORM
 //!
 //! \see IParser::setFlags() and IParser::getFlags()
 //!
@@ -249,6 +249,26 @@ class IParser
     //!
     virtual void clearErrors() = 0;
 
+    virtual ~IParser() noexcept = default;
+
+    //!
+    //! \brief Query the plugin libraries needed to implement operations used by the parser in a version-compatible
+    //! engine.
+    //!
+    //! This provides a list of plugin libraries on the filesystem needed to implement operations
+    //! in the parsed network.  If you are building a version-compatible engine using this network,
+    //! provide this list to IBuilderConfig::setPluginsToSerialize to serialize these plugins along
+    //! with the version-compatible engine, or, if you want to ship these plugin libraries externally
+    //! to the engine, ensure that IPluginRegistry::loadLibrary is used to load these libraries in the
+    //! appropriate runtime before deserializing the corresponding engine.
+    //!
+    //! \param[out] nbPluginLibs Returns the number of plugin libraries in the array, or -1 if there was an error.
+    //! \return Array of `nbPluginLibs` C-strings describing plugin library paths on the filesystem if nbPluginLibs > 0,
+    //! or nullptr otherwise.  This array is owned by the IParser, and the pointers in the array are only valid until
+    //! the next call to parse(), supportsModel(), parseFromFile(), or parseWithWeightDescriptors().
+    //!
+    virtual char const* const* getUsedVCPluginLibraries(int64_t& nbPluginLibs) const noexcept = 0;
+
     //!
     //! \brief Set the parser flags.
     //!
@@ -297,26 +317,6 @@ class IParser
     //! \return True if flag is set, false if unset.
     //!
     virtual bool getFlag(OnnxParserFlag onnxParserFlag) const noexcept = 0;
-
-    virtual ~IParser() noexcept = default;
-
-    //!
-    //! \brief Query the plugin libraries needed to implement operations used by the parser in a version-compatible
-    //! engine.
-    //!
-    //! This provides a list of plugin libraries on the filesystem needed to implement operations
-    //! in the parsed network.  If you are building a version-compatible engine using this network,
-    //! provide this list to IBuilderConfig::setPluginsToSerialize to serialize these plugins along
-    //! with the version-compatible engine, or, if you want to ship these plugin libraries externally
-    //! to the engine, ensure that IPluginRegistry::loadLibrary is used to load these libraries in the
-    //! appropriate runtime before deserializing the corresponding engine.
-    //!
-    //! \param[out] nbPluginLibs Returns the number of plugin libraries in the array, or -1 if there was an error.
-    //! \return Array of `nbPluginLibs` C-strings describing plugin library paths on the filesystem if nbPluginLibs > 0,
-    //! or nullptr otherwise.  This array is owned by the IParser, and the pointers in the array are only valid until
-    //! the next call to parse(), supportsModel(), parseFromFile(), or parseWithWeightDescriptors().
-    //!
-    virtual char const* const* getUsedVCPluginLibraries(int64_t& nbPluginLibs) const noexcept = 0;
 };
 
 } // namespace nvonnxparser

diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ For press and other inquiries, please contact Hector Marinez at hmarinez@nvidia.
 
 ## Supported TensorRT Versions
 
-Development on the `main` branch is for the latest version of [TensorRT 8.6.0](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
+Development on the `main` branch is for the latest version of [TensorRT 8.6](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
 
 For previous versions of TensorRT, refer to their respective branches.
 
@@ -48,8 +48,8 @@ Current supported ONNX operators are found in the [operator support matrix](docs
 ### Dependencies
 
  - [Protobuf >= 3.0.x](https://github.com/google/protobuf/releases)
- - [TensorRT 8.6.0](https://developer.nvidia.com/tensorrt)
- - [TensorRT 8.6.0 open source libaries (main branch)](https://github.com/NVIDIA/TensorRT/)
+ - [TensorRT 8.6](https://developer.nvidia.com/tensorrt)
+ - [TensorRT 8.6 open source libaries (main branch)](https://github.com/NVIDIA/TensorRT/)
 
 ### Building
 
@@ -65,10 +65,19 @@ Once you have cloned the repository, you can build the parser libraries and exec
 
 Note that this project has a dependency on CUDA. By default the build will look in `/usr/local/cuda` for the CUDA toolkit installation. If your CUDA path is different, overwrite the default path by providing `-DCUDA_TOOLKIT_ROOT_DIR=<path_to_cuda_install>` in the CMake command.
 
-### Experimental Ops
-All experimental operators will be considered unsupported by the ONNX-TRT's `supportsModel()` function.
+### InstanceNormalizaiton Performance
 
-`NonMaxSuppression` is available as an experimental operator in TensorRT 8. It has the limitation that the output shape is always padded to length [`max_output_boxes_per_class`, 3], therefore some post processing is required to extract the valid indices.
+In TensorRT 8.6 there are two implementations of InstanceNormalization that may perform differently depending on various parameters. By default the parser will insert an InstanceNormalization plugin layer as it performs best for general use cases. Users that want to benchmark using the native TensorRT implementation of InstanceNorm can set the parser flag `kNATIVE_INSTANCENORM` prior to parsing the model. For building version compatible or hardware compatible engines, this flag must be set.
+
+C++ Example:
+
+    auto flag = 1U << static_cast<uint32_t>(nvonnxparser::OnnxParserFlag::kNATIVE_INSTANCENORM);
+    parser->setFlags(flag);
+
+Python Example:
+
+    flag = 1 << (int)(trt.OnnxParserFlag.NATIVE_INSTANCENORM)
+    parser.flags = flag
 
 ## Executable Usage
 
@@ -92,9 +101,9 @@ Python bindings for the ONNX-TensorRT parser are packaged in the shipped `.whl`
 
     python3 -m pip install <tensorrt_install_dir>/python/tensorrt-8.x.x.x-cp<python_ver>-none-linux_x86_64.whl
 
-TensorRT 8.6.0 supports ONNX release 1.12.0. Install it with:
+TensorRT 8.6 supports ONNX release 1.13.1. Install it with:
 
-    python3 -m pip install onnx==1.12.0
+    python3 -m pip install onnx==1.13.1
 
 The ONNX-TensorRT backend can be installed by running:
 

diff --git a/builtin_op_importers.cpp b/builtin_op_importers.cpp
@@ -2342,7 +2342,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Loop)
     constexpr int32_t NB_NON_STATE_INPUTS = 2; // First 2 inputs are trip count and condition respectively.
     constexpr int32_t NB_DISCARDED_OUTPUTS
         = 1; // First output is the updated value of the condition, and is ignored by the outer loop node.
-    constexpr int32_t DUMMY_SCAN_OUTPUT_LENGTH = 1;
+    constexpr int32_t DUMMY_SCAN_OUTPUT_LENGTH = 1024;
     ASSERT((inputs.size() >= 2) && "The Loop operator requires at least 2 inputs.", ErrorCode::kINVALID_NODE);
     OnnxAttrs attrs(node, ctx);
     int32_t const nbInputs = node.input().size();
@@ -4947,13 +4947,13 @@ DEFINE_BUILTIN_OP_IMPORTER(Trilu)
     using eOp = nvinfer1::ElementWiseOperation;
     auto* data = &convertToTensor(inputs.at(0), ctx);
     auto const nbDims = data->getDimensions().nbDims;
-    ASSERT((nbDims == 2 || nbDims == 3) && "Trilu input must have 2 or 3 input dimensions!", ErrorCode::kINVALID_NODE);
+    ASSERT((nbDims >= 2) && "Trilu input must have at least 2 dimensions!", ErrorCode::kINVALID_NODE);
     OnnxAttrs attrs(node, ctx);
     int32_t const upper = attrs.get("upper", 0);
 
     // Input may be in a batch so we need to get NxM dimensions
-    int64_t const N = nbDims == 2 ? 0 : 1;
-    int64_t const M = nbDims == 2 ? 1 : 2;
+    int64_t const N = nbDims - 2;
+    int64_t const M = nbDims - 1;
 
     // Create iota dims of NxM
     const ShapeTensor iotadims
@@ -4975,11 +4975,13 @@ DEFINE_BUILTIN_OP_IMPORTER(Trilu)
         cols = &elementwiseHelper(ctx, node, {cols, k}, eOp::kSUB).value().at(0).tensor();
     }
 
-    // Unsqueeze to broadcast rows/cols to 3D if necessary during next elementwise operation
-    if (nbDims == 3)
+    // Unsqueeze to broadcast rows/cols if necessary during next elementwise operation.
+    if (nbDims > 2)
     {
-        rows = unsqueezeTensor(ctx, node, *rows, {0});
-        cols = unsqueezeTensor(ctx, node, *cols, {0});
+        std::vector<int32_t> batchDims(nbDims - 2);
+        std::iota(batchDims.begin(), batchDims.end(), 0);
+        rows = unsqueezeTensor(ctx, node, *rows, batchDims);
+        cols = unsqueezeTensor(ctx, node, *cols, batchDims);
     }
 
     // For lower Trilus, use greaterOrEquals. For upper Trilus, use lessOrEquals

diff --git a/docs/Changelog.md b/docs/Changelog.md
@@ -2,6 +2,14 @@
 
 # ONNX-TensorRT Changelog
 
+# TensorRT 8.6 GA Release - 2023-5-1
+For more details, see the 8.6 GA release notes for the fixes since 8.6 EA.
+
+- Renamed `kVERSION_COMPATIBLE` flag to `kNATIVE_INSTANCENORM`
+- Added support for N-D `Trilu`
+- Removed old LSTM importer
+- Updated ONNX submodule to v1.13.1.
+
 # TensorRT 8.6 EA Release - 2023-3-13
 
 ## Added