Staging for the 10.4-GA release (#995)

Signed-off-by: poweiw <[email protected]>
onnx · Sep 11, 2024 · 3775e49 · 3775e49
1 parent efd73c8
commit 3775e49
Show file tree

Hide file tree

Showing 16 changed files with 455 additions and 353 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -28,7 +28,7 @@ add_definitions("-DSOURCE_LENGTH=${SOURCE_LENGTH}")
 # Version information
 #--------------------------------------------------
 set(ONNX2TRT_MAJOR 10)
-set(ONNX2TRT_MINOR 3)
+set(ONNX2TRT_MINOR 4)
 set(ONNX2TRT_PATCH 0)
 set(ONNX2TRT_VERSION "${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}.${ONNX2TRT_PATCH}" CACHE STRING "ONNX2TRT version")
 

diff --git a/ImporterContext.cpp b/ImporterContext.cpp
@@ -134,7 +134,10 @@ void ImporterContext::registerLayer(nvinfer1::ILayer* layer, std::string const&
             mConstantLayers.insert({uniqueName, static_cast<nvinfer1::IConstantLayer*>(layer)});
         }
     }
-    if (node != nullptr && layer != nullptr)
+    // Set metadata only if the layer is associated with an ONNX node.
+    // Skip constant layers because constants are represented as initializers in ONNX and should not be associated
+    // with any ONNX node.
+    if (node != nullptr && layer != nullptr && layer->getType() != nvinfer1::LayerType::kCONSTANT)
     {
         processMetadata(this, *node, layer);
     }

diff --git a/ModelImporter.cpp b/ModelImporter.cpp
@@ -439,6 +439,17 @@ Status importLocalFunctions(ImporterContext* ctx, ::ONNX_NAMESPACE::ModelProto c
     return Status::success();
 }
 
+// Internal helper function used for ONNXRT-TRT EP to filter out DDS nodes
+bool isDDSOp(char const* op_name)
+{
+    auto is = [op_name](char const* name) { return std::strcmp(op_name, name) == 0; };
+    if (is("NonMaxSuppression") || is("NonZero") || is("RoiAlign"))
+    {
+        return true;
+    }
+    return false;
+}
+
 std::pair<bool, ModelImporter::SubGraphSupportVector_t> ModelImporter::doSupportsModel(
     void const* serialized_onnx_model, size_t serialized_onnx_model_size, char const* model_path)
 {
@@ -514,9 +525,10 @@ std::pair<bool, ModelImporter::SubGraphSupportVector_t> ModelImporter::doSupport
         //     1. It is not a node that requires DDS
         //     2. It is not directly connected to an unsupported input
         //     3. The importer function did not throw an assertion
+        bool unsupportedDDS = isDDSOp(node.op_type().c_str());
         bool unsupportedInput = (input_node.empty()) ? false : checkForInput(node);
         bool unsuccessfulParse = node_idx == error_node;
-        if (!unsupportedInput && !unsuccessfulParse)
+        if (!unsupportedDDS && !unsupportedInput && !unsuccessfulParse)
         {
             if (newSubGraph)
             {

diff --git a/ModelRefitter.cpp b/ModelRefitter.cpp
diff --git a/ModelRefitter.hpp b/ModelRefitter.hpp
@@ -60,17 +60,17 @@ class ModelRefitter : public nvonnxparser::IParserRefitter
     //! TConvertFunc is a functor for converting ShapedWeights to an array of type T.
     //! It should return a T*.
     template <typename T, typename TConvertFunc>
-    ValueOrStatus<size_t> batchnormWeightRefitter(
+    size_t batchnormWeightRefitter(
         ::ONNX_NAMESPACE::NodeProto const& node, std::vector<ShapedWeights>& inputs, TConvertFunc&& f);
 
-    Status refitOnnxWeights(::ONNX_NAMESPACE::ModelProto const& onnx_model);
-    Status refitOnnxGraph(::ONNX_NAMESPACE::GraphProto const& graph);
-    Status refitOnnxNode(::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph);
-    Status refitOnnxConstantNode(::ONNX_NAMESPACE::NodeProto const& node, std::string const& graphName);
-    Status refitOnnxBatchNormNode(::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph);
-    Status refitOnnxIfNode(::ONNX_NAMESPACE::NodeProto const& node);
-    Status refitOnnxLoopNode(::ONNX_NAMESPACE::NodeProto const& node);
-    Status refitOnnxScanNode(::ONNX_NAMESPACE::NodeProto const& node);
+    void refitOnnxWeights(::ONNX_NAMESPACE::ModelProto const& onnx_model);
+    void refitOnnxGraph(::ONNX_NAMESPACE::GraphProto const& graph);
+    void refitOnnxNode(::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph);
+    void refitOnnxConstantNode(::ONNX_NAMESPACE::NodeProto const& node, std::string const& graphName);
+    void refitOnnxBatchNormNode(::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph);
+    void refitOnnxIfNode(::ONNX_NAMESPACE::NodeProto const& node);
+    void refitOnnxLoopNode(::ONNX_NAMESPACE::NodeProto const& node);
+    void refitOnnxScanNode(::ONNX_NAMESPACE::NodeProto const& node);
 
 public:
     ModelRefitter(nvinfer1::IRefitter* refitter, nvinfer1::ILogger* logger)

diff --git a/OnnxAttrs.cpp b/OnnxAttrs.cpp
@@ -129,9 +129,9 @@ onnx2trt::ShapedWeights OnnxAttrs::get<onnx2trt::ShapedWeights>(std::string cons
     std::string extName = this->at(key)->ref_attr_name();
     bool isExtAttr = isExternalAttribute(extName, mCtx);
 
-    ::ONNX_NAMESPACE::TensorProto const& onnx_weights_tensor = isExtAttr ? mCtx->localFunctionStack().back().second.at(extName)->t() : this->at(key)->t();
+    ::ONNX_NAMESPACE::TensorProto const& onnxTensor = isExtAttr ? mCtx->localFunctionStack().back().second.at(extName)->t() : this->at(key)->t();
     onnx2trt::ShapedWeights weights;
-    bool success = mCtx->getWeightsContext().convertOnnxWeights(onnx_weights_tensor, &weights);
+    bool success = mCtx->getWeightsContext().convertOnnxWeights(onnxTensor, &weights, true);
     if (!success)
     {
         throw std::runtime_error{"Unable to convert ONNX weights"};

diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ For press and other inquiries, please contact Hector Marinez at hmarinez@nvidia.
 
 ## Supported TensorRT Versions
 
-Development on the this branch is for the latest version of [TensorRT 10.2](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
+Development on the this branch is for the latest version of [TensorRT 10.4](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
 
 For previous versions of TensorRT, refer to their respective branches.
 
@@ -29,8 +29,8 @@ Current supported ONNX operators are found in the [operator support matrix](docs
 ### Dependencies
 
  - [Protobuf >= 3.0.x](https://github.com/google/protobuf/releases)
- - [TensorRT 10.2](https://developer.nvidia.com/tensorrt)
- - [TensorRT 10.2 open source libaries] (https://github.com/NVIDIA/TensorRT/)
+ - [TensorRT 10.4](https://developer.nvidia.com/tensorrt)
+ - [TensorRT 10.4 open source libaries] (https://github.com/NVIDIA/TensorRT/)
 
 ### Building
 
@@ -82,7 +82,7 @@ Refer to the link or run `polygraphy run -h` for more information on CLI options
 
 Python bindings for the ONNX-TensorRT parser are packaged in the shipped `.whl` files.
 
-TensorRT 10.1 supports ONNX release 1.16.0. Install it with:
+TensorRT 10.4 supports ONNX release 1.16.0. Install it with:
 
     python3 -m pip install onnx==1.16.0
 

diff --git a/ShapeTensor.cpp b/ShapeTensor.cpp
@@ -542,7 +542,7 @@ nvinfer1::ISliceLayer* addSlice(ImporterContext* ctx, nvinfer1::ITensor& data, c
     constexpr int32_t minDim = std::numeric_limits<int32_t>::min();
     constexpr int32_t maxDim = std::numeric_limits<int32_t>::max();
     nvinfer1::ISliceLayer* slice = N_CHECK(ctx->network()->addSlice(data,
-        shapeTensorToDims(starts, "slice start", 0, maxDim), shapeTensorToDims(sizes, "slice size", 0, maxDim),
+        shapeTensorToDims(starts, "slice start", minDim, maxDim), shapeTensorToDims(sizes, "slice size", 0, maxDim),
         shapeTensorToDims(strides, "slide strides", minDim, maxDim)));
     setShapeInputIfDynamic(ctx, slice, 1, starts);
     setShapeInputIfDynamic(ctx, slice, 2, sizes);

diff --git a/Status.hpp b/Status.hpp
@@ -203,6 +203,7 @@ static std::ostream& operator<<(std::ostream& stream, nvinfer1::DataType const&
     case nvinfer1::DataType::kBOOL: return stream << "bool";
     case nvinfer1::DataType::kFP8: return stream << "float8";
     case nvinfer1::DataType::kINT4: return stream << "int4";
+
     default: throw std::runtime_error("Unknown dtype");
     }
 }

diff --git a/docs/Changelog.md b/docs/Changelog.md
@@ -2,6 +2,14 @@
 
 # ONNX-TensorRT Changelog
 
+# TensorRT 10.4 GA Release - 2024-9-5
+For more details, see the 10.4 GA release notes.
+
+- Added support for tensor `axes` for `Pad` operations
+- Added support for `BlackmanWindow`, `HammingWindow`, and `HannWindow` operations
+- Improved error handling in `IParserRefitter`
+- Fixed kernel shape inference in multi-input convolutions
+
 # TensorRT 10.3 GA Release - 2024-8-7
 For more details, see the 10.3 GA release notes.
 
@@ -14,13 +22,14 @@ For more details, see the 10.2 GA release notes.
 - Improved error handling with new macros and classes
 - Minor changes to op importers for `GRU` and `Squeeze`
 
-# TensorRT 10.1 GA Release - 2024-6-17
+# TensorRT 10.1 GA Release - 2024-6-10
 For more details, see the 10.1 GA release notes.
 
 - Added `supportsModelV2` API
 - Added support for `DeformConv` operation
 - Added support for `PluginV3` TensorRT Plugins
 - Marked all IParser and IParserRefitter APIs as `noexcept`
+- Shape inputs can be passed to custom ops supported by `IPluginV3`-based plugins by indicating the input indices to be interpreted as shape inputs by a node attribute named `tensorrt_plugin_shape_input_indices`.
 
 # TensorRT 10.0 GA Release - 2024-4-25
 For more details, see the 10.0 GA release notes.

diff --git a/docs/operators.md b/docs/operators.md
@@ -2,7 +2,7 @@
 
 # Supported ONNX Operators
 
-TensorRT 10.0 supports operators in the inclusive range of opset 9 to opset 20. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/main/docs/Operators.md). More details and limitations are documented in the chart below.
+TensorRT 10.4 supports operators in the inclusive range of opset 9 to opset 20. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/main/docs/Operators.md). More details and limitations are documented in the chart below.
 
 TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOAT16, INT32, INT64, FP8, INT8, INT4, UINT8, and BOOL
 
@@ -36,7 +36,7 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA
 | BitwiseNot                | N          |
 | BitwiseOr                 | N          |
 | BitwiseXor                | N          |
-| BlackmanWindow            | N          |
+| BlackmanWindow            | Y          |
 | Cast                      | Y          | FP32, FP16, BF16, INT32, INT64, UINT8, BOOL |                                                                                                       |
 | CastLike                  | Y          | FP32, FP16, BF16, INT32, INT64, UINT8, BOOL |                                                                                                       |
 | Ceil                      | Y          | FP32, FP16, BF16 |
@@ -85,8 +85,8 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA
 | GridSample                | Y          | FP32, FP16 | Input must be 4D input.
 | GroupNormalization        | Y          | FP32, FP16, BF16 |
 | GRU                       | Y          | FP32, FP16, BF16 | For bidirectional GRUs, activation functions must be the same for both the forward and reverse pass
-| HammingWindow             | N          |
-| HannWindow                | N          |
+| HammingWindow             | Y          |
+| HannWindow                | Y          |
 | HardSigmoid               | Y          | FP32, FP16, BF16 |
 | HardSwish                 | Y          | FP32, FP16, BF16 |
 | Hardmax                   | Y          | FP32, FP16, BF16 | `axis` dimension of input must be a build-time constant
@@ -132,7 +132,7 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA
 | OptionalGetElement        | N          |
 | OptionalHasElement        | N          |
 | Or                        | Y          | BOOL |
-| Pad                       | Y          | FP32, FP16, BF16, INT32, INT64 | `axes` must be an initializer |
+| Pad                       | Y          | FP32, FP16, BF16, INT32, INT64 |
 | ParametricSoftplus        | Y          | FP32, FP16, BF16 |
 | Pow                       | Y          | FP32, FP16, BF16 |
 | PRelu                     | Y          | FP32, FP16, BF16 |
@@ -184,7 +184,7 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA
 | Sin                       | Y          | FP32, FP16, BF16 |
 | Sinh                      | Y          | FP32, FP16, BF16 |
 | Size                      | Y          | FP32, FP16, BF16, INT32, INT64, BOOL |
-| Slice                     | Y          | FP32, FP16, BF16, INT32, INT64, BOOL | 
+| Slice                     | Y          | FP32, FP16, BF16, INT32, INT64, BOOL |
 | Softmax                   | Y          | FP32, FP16, BF16 |
 | SoftmaxCrossEntropyLoss   | N          |
 | Softplus                  | Y          | FP32, FP16, BF16 |