diff --git a/CMakeLists.txt b/CMakeLists.txt index 95ca994..3955a6b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,7 @@ add_definitions("-DSOURCE_LENGTH=${SOURCE_LENGTH}") # Version information #-------------------------------------------------- set(ONNX2TRT_MAJOR 10) -set(ONNX2TRT_MINOR 3) +set(ONNX2TRT_MINOR 4) set(ONNX2TRT_PATCH 0) set(ONNX2TRT_VERSION "${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}.${ONNX2TRT_PATCH}" CACHE STRING "ONNX2TRT version") diff --git a/ImporterContext.cpp b/ImporterContext.cpp index 7f74757..fdd3dbd 100644 --- a/ImporterContext.cpp +++ b/ImporterContext.cpp @@ -134,7 +134,10 @@ void ImporterContext::registerLayer(nvinfer1::ILayer* layer, std::string const& mConstantLayers.insert({uniqueName, static_cast(layer)}); } } - if (node != nullptr && layer != nullptr) + // Set metadata only if the layer is associated with an ONNX node. + // Skip constant layers because constants are represented as initializers in ONNX and should not be associated + // with any ONNX node. + if (node != nullptr && layer != nullptr && layer->getType() != nvinfer1::LayerType::kCONSTANT) { processMetadata(this, *node, layer); } diff --git a/ModelImporter.cpp b/ModelImporter.cpp index 48c7a29..39b2505 100644 --- a/ModelImporter.cpp +++ b/ModelImporter.cpp @@ -439,6 +439,17 @@ Status importLocalFunctions(ImporterContext* ctx, ::ONNX_NAMESPACE::ModelProto c return Status::success(); } +// Internal helper function used for ONNXRT-TRT EP to filter out DDS nodes +bool isDDSOp(char const* op_name) +{ + auto is = [op_name](char const* name) { return std::strcmp(op_name, name) == 0; }; + if (is("NonMaxSuppression") || is("NonZero") || is("RoiAlign")) + { + return true; + } + return false; +} + std::pair ModelImporter::doSupportsModel( void const* serialized_onnx_model, size_t serialized_onnx_model_size, char const* model_path) { @@ -514,9 +525,10 @@ std::pair ModelImporter::doSupport // 1. It is not a node that requires DDS // 2. It is not directly connected to an unsupported input // 3. The importer function did not throw an assertion + bool unsupportedDDS = isDDSOp(node.op_type().c_str()); bool unsupportedInput = (input_node.empty()) ? false : checkForInput(node); bool unsuccessfulParse = node_idx == error_node; - if (!unsupportedInput && !unsuccessfulParse) + if (!unsupportedDDS && !unsupportedInput && !unsuccessfulParse) { if (newSubGraph) { diff --git a/ModelRefitter.cpp b/ModelRefitter.cpp index 4416569..a053786 100644 --- a/ModelRefitter.cpp +++ b/ModelRefitter.cpp @@ -20,7 +20,7 @@ namespace onnx2trt { namespace { -Status deserializeOnnxModelFile(char const* onnxModelFile, ::ONNX_NAMESPACE::ModelProto& onnx_model) +void deserializeOnnxModelFile(char const* onnxModelFile, ::ONNX_NAMESPACE::ModelProto& onnx_model) { // Define S_ISREG macro for Windows #if !defined(S_ISREG) @@ -28,15 +28,14 @@ Status deserializeOnnxModelFile(char const* onnxModelFile, ::ONNX_NAMESPACE::Mod #endif struct stat sb; - ASSERT(!(stat(onnxModelFile, &sb) == 0 && !S_ISREG(sb.st_mode)) - && "Failed to parse the ONNX model; input is not a regular file.", - ErrorCode::kMODEL_DESERIALIZE_FAILED); + ONNXTRT_CHECK(!(stat(onnxModelFile, &sb) == 0 && !S_ISREG(sb.st_mode)), + MAKE_ERROR( + "Failed to parse the ONNX model; input is not a regular file.", ErrorCode::kMODEL_DESERIALIZE_FAILED)); GOOGLE_PROTOBUF_VERIFY_VERSION; bool const fileLoadSuccess = ParseFromFileAsBinary(&onnx_model, onnxModelFile); - ASSERT(fileLoadSuccess && "Failed to parse the ONNX model!", ErrorCode::kMODEL_DESERIALIZE_FAILED); - return Status::success(); + ONNXTRT_CHECK(fileLoadSuccess, MAKE_ERROR("Failed to parse the ONNX model!", ErrorCode::kMODEL_DESERIALIZE_FAILED)); } } // anonymous namespace @@ -49,7 +48,7 @@ std::unordered_set ModelRefitter::getRefittableWeights() } template -ValueOrStatus ModelRefitter::batchnormWeightRefitter( +size_t ModelRefitter::batchnormWeightRefitter( ::ONNX_NAMESPACE::NodeProto const& node, std::vector& inputs, TConvertFunc&& f) { auto const& scale = inputs.at(0); @@ -88,7 +87,8 @@ ValueOrStatus ModelRefitter::batchnormWeightRefitter( // Validate that all the weights have the same amount of values bool allSame = scale.count() == bias.count() && mean.count() == scale.count() && variance.count() == scale.count() && combinedScale.count() == scale.count() && combinedBias.count() == scale.count(); - ASSERT(allSame && "Inputs to BatchNormalization must have the same shape!", ErrorCode::kREFIT_FAILED); + ONNXTRT_CHECK( + allSame, MAKE_ERROR("Inputs to BatchNormalization must have the same shape!", ErrorCode::kREFIT_FAILED)); for (int32_t i = 0; i < nbChannels; ++i) { @@ -99,16 +99,15 @@ ValueOrStatus ModelRefitter::batchnormWeightRefitter( if (refittableWeights.count(combinedScale.name)) { refittableWeights.erase(combinedScale.name); - ASSERT( - mRefitter->setNamedWeights(combinedScale.name, std::move(combinedScale)) && "Failed to set named weights", - ErrorCode::kREFIT_FAILED); + ONNXTRT_CHECK(mRefitter->setNamedWeights(combinedScale.name, std::move(combinedScale)), + MAKE_ERROR("Failed to set named weights", ErrorCode::kREFIT_FAILED)); ++successfullyRefittedWeights; } if (refittableWeights.count(combinedBias.name)) { refittableWeights.erase(combinedBias.name); - ASSERT(mRefitter->setNamedWeights(combinedBias.name, std::move(combinedBias)) && "Failed to set named weights", - ErrorCode::kREFIT_FAILED); + ONNXTRT_CHECK(mRefitter->setNamedWeights(combinedBias.name, std::move(combinedBias)), + MAKE_ERROR("Failed to set named weights", ErrorCode::kREFIT_FAILED)); ++successfullyRefittedWeights; } return successfullyRefittedWeights; @@ -125,18 +124,17 @@ class QuickCast }; }; -Status ModelRefitter::refitOnnxWeights(::ONNX_NAMESPACE::ModelProto const& onnx_model) +void ModelRefitter::refitOnnxWeights(::ONNX_NAMESPACE::ModelProto const& onnx_model) { nestedDepth = 0; successfullyRefittedWeights = 0; size_t const numberOfWeightsToRefit = refittableWeights.size(); - CHECK_STATUS(refitOnnxGraph(onnx_model.graph())); - ASSERT(successfullyRefittedWeights == numberOfWeightsToRefit && "Failed to refit all the weights.", - ErrorCode::kREFIT_FAILED); - return Status::success(); + refitOnnxGraph(onnx_model.graph()); + ONNXTRT_CHECK(successfullyRefittedWeights == numberOfWeightsToRefit, + MAKE_ERROR("Failed to refit all the weights.", ErrorCode::kREFIT_FAILED)); } -Status ModelRefitter::refitOnnxGraph(::ONNX_NAMESPACE::GraphProto const& graph) +void ModelRefitter::refitOnnxGraph(::ONNX_NAMESPACE::GraphProto const& graph) { for (::ONNX_NAMESPACE::TensorProto const& initializer : graph.initializer()) { @@ -159,66 +157,62 @@ Status ModelRefitter::refitOnnxGraph(::ONNX_NAMESPACE::GraphProto const& graph) refittedWeights.insert(initializer.name()); } ShapedWeights weights; - ASSERT(mWeightsContext.convertOnnxWeights(initializer, &weights, /*ownAllWeights=*/true) - && "Failed to import initializer.", - ErrorCode::kUNSUPPORTED_NODE); - ASSERT( - mRefitter->setNamedWeights(initializer.name().c_str(), std::move(weights)) && "Failed to set named weights", - ErrorCode::kREFIT_FAILED); + ONNXTRT_CHECK(mWeightsContext.convertOnnxWeights(initializer, &weights, /*ownAllWeights=*/true), + MAKE_ERROR("Failed to import initializer.", ErrorCode::kUNSUPPORTED_NODE)); + ONNXTRT_CHECK(mRefitter->setNamedWeights(initializer.name().c_str(), std::move(weights)), + MAKE_ERROR("Failed to set named weights", ErrorCode::kREFIT_FAILED)); ++successfullyRefittedWeights; } std::vector topoOrder; - ASSERT(toposort(graph.node(), &topoOrder) && "Failed to sort the model topologically.", ErrorCode::kINVALID_GRAPH); + ONNXTRT_CHECK(toposort(graph.node(), &topoOrder), + MAKE_ERROR("Failed to sort the model topologically.", ErrorCode::kINVALID_GRAPH)); for (auto const& nodeIdx : topoOrder) { ::ONNX_NAMESPACE::NodeProto const& node = graph.node(nodeIdx); - CHECK_STATUS(refitOnnxNode(node, graph)); + refitOnnxNode(node, graph); } - return Status::success(); } -Status ModelRefitter::refitOnnxNode(::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph) +void ModelRefitter::refitOnnxNode(::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph) { // For nodes that contain subgraphs (Ifs, Loops, Scans), // ensure that the recursion depth is limited to a set amount. ++nestedDepth; static size_t const MAX_NESTED_SUBGRAPHS = 24; - ASSERT((nestedDepth <= MAX_NESTED_SUBGRAPHS) - && "ONNX graph contains nested structures that exceed the maximum allowed by TensorRT!", - ErrorCode::kUNSUPPORTED_GRAPH); + ONNXTRT_CHECK((nestedDepth <= MAX_NESTED_SUBGRAPHS), + MAKE_ERROR("ONNX graph contains nested structures that exceed the maximum allowed by TensorRT!", + ErrorCode::kUNSUPPORTED_GRAPH)); - Status status{ErrorCode::kSUCCESS}; if (node.op_type() == "Constant") { - status = refitOnnxConstantNode(node, graph.name()); + refitOnnxConstantNode(node, graph.name()); } else if (node.op_type() == "BatchNormalization") { - status = refitOnnxBatchNormNode(node, graph); + refitOnnxBatchNormNode(node, graph); } else if (node.op_type() == "If") { - status = refitOnnxIfNode(node); + refitOnnxIfNode(node); } else if (node.op_type() == "Loop") { - status = refitOnnxLoopNode(node); + refitOnnxLoopNode(node); } else if (node.op_type() == "Scan") { - status = refitOnnxScanNode(node); + refitOnnxScanNode(node); } --nestedDepth; - return status; } -Status ModelRefitter::refitOnnxConstantNode(::ONNX_NAMESPACE::NodeProto const& node, std::string const& graphName) +void ModelRefitter::refitOnnxConstantNode(::ONNX_NAMESPACE::NodeProto const& node, std::string const& graphName) { if (!refittableWeights.count(node.output(0))) { - return Status::success(); + return; } refittableWeights.erase(node.output(0)); if (refittedWeights.count(node.output(0))) @@ -237,7 +231,8 @@ Status ModelRefitter::refitOnnxConstantNode(::ONNX_NAMESPACE::NodeProto const& n { weights = mWeightsContext.createTempWeights(::ONNX_NAMESPACE::TensorProto::FLOAT, {0, {}}); float value = nodeAttribute.f(); - ASSERT(weights.count() == 1 && "Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK( + weights.count() == 1, MAKE_ERROR("Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE)); std::memcpy(weights.values, &value, sizeof(float)); } else if (nodeAttribute.name() == "value_floats") @@ -245,14 +240,16 @@ Status ModelRefitter::refitOnnxConstantNode(::ONNX_NAMESPACE::NodeProto const& n std::vector values{nodeAttribute.floats().begin(), nodeAttribute.floats().end()}; int64_t valueSize = values.size(); weights = mWeightsContext.createTempWeights(::ONNX_NAMESPACE::TensorProto::FLOAT, {1, {valueSize}}); - ASSERT(weights.count() == values.size() && "Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK(weights.count() == values.size(), + MAKE_ERROR("Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE)); std::memcpy(weights.values, values.data(), weights.count() * sizeof(float)); } else if (nodeAttribute.name() == "value_int") { weights = mWeightsContext.createTempWeights(::ONNX_NAMESPACE::TensorProto::INT64, {0, {}}); int64_t value = nodeAttribute.i(); - ASSERT(weights.count() == 1 && "Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK( + weights.count() == 1, MAKE_ERROR("Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE)); std::memcpy(weights.values, &value, sizeof(int64_t)); } else if (nodeAttribute.name() == "value_ints") @@ -260,25 +257,26 @@ Status ModelRefitter::refitOnnxConstantNode(::ONNX_NAMESPACE::NodeProto const& n std::vector values{nodeAttribute.ints().begin(), nodeAttribute.ints().end()}; int64_t valueSize = values.size(); weights = mWeightsContext.createTempWeights(::ONNX_NAMESPACE::TensorProto::INT64, {1, {valueSize}}); - ASSERT(weights.count() == values.size() && "Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK(weights.count() == values.size(), + MAKE_ERROR("Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE)); std::memcpy(weights.values, values.data(), weights.count() * sizeof(int64_t)); } else { ::ONNX_NAMESPACE::TensorProto const& onnx_weights_tensor = nodeAttribute.t(); - ASSERT(mWeightsContext.convertOnnxWeights(onnx_weights_tensor, &weights) && "Failed to import Constant node.", - ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK(mWeightsContext.convertOnnxWeights(onnx_weights_tensor, &weights), + MAKE_ERROR("Failed to import Constant node.", ErrorCode::kUNSUPPORTED_NODE)); } - ASSERT(mRefitter->setNamedWeights(node.output(0).c_str(), std::move(weights)) && "Failed to set named weights", - ErrorCode::kREFIT_FAILED); + ONNXTRT_CHECK(mRefitter->setNamedWeights(node.output(0).c_str(), std::move(weights)), + MAKE_ERROR("Failed to set named weights", ErrorCode::kREFIT_FAILED)); ++successfullyRefittedWeights; - return Status::success(); } -Status ModelRefitter::refitOnnxBatchNormNode( +void ModelRefitter::refitOnnxBatchNormNode( ::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph) { - ASSERT(node.input().size() == 5 && "BatchNorm node does not have five required inputs.", ErrorCode::kINVALID_NODE); + ONNXTRT_CHECK(node.input().size() == 5, + MAKE_ERROR("BatchNorm node does not have five required inputs.", ErrorCode::kINVALID_NODE)); std::vector batchNormInputs; // The following looping construct is due to the fact that some tensors // might be shared among the BatchNorm's inputs @@ -290,8 +288,8 @@ Status ModelRefitter::refitOnnxBatchNormNode( if (inputNames.at(inputIdx) == initializer.name()) { ShapedWeights weights; - ASSERT(mWeightsContext.convertOnnxWeights(initializer, &weights) && "Failed to import initializer.", - ErrorCode::kUNSUPPORTED_NODE); + ONNXTRT_CHECK(mWeightsContext.convertOnnxWeights(initializer, &weights), + MAKE_ERROR("Failed to import initializer.", ErrorCode::kUNSUPPORTED_NODE)); weights.name = initializer.name().c_str(); batchNormInputs.push_back(std::move(weights)); break; @@ -304,9 +302,9 @@ Status ModelRefitter::refitOnnxBatchNormNode( // we must have already refitted the weights directly in refitOnnxGraph() if (batchNormInputs.size() < 4) { - return Status::success(); + return; } - ValueOrStatus batchnormRefittedWeights{0}; + size_t batchnormRefittedWeights{0}; auto const scaleType = batchNormInputs.at(0).type; bool const typesEqual = scaleType == batchNormInputs.at(1).type && scaleType == batchNormInputs.at(2).type && scaleType == batchNormInputs.at(3).type; @@ -314,34 +312,21 @@ Status ModelRefitter::refitOnnxBatchNormNode( { batchnormRefittedWeights = batchnormWeightRefitter(node, batchNormInputs, QuickCast()); - if (batchnormRefittedWeights.is_error()) - { - return batchnormRefittedWeights.error(); - } } else if (typesEqual && scaleType == ::ONNX_NAMESPACE::TensorProto::BFLOAT16) { batchnormRefittedWeights = batchnormWeightRefitter(node, batchNormInputs, QuickCast()); - if (batchnormRefittedWeights.is_error()) - { - return batchnormRefittedWeights.error(); - } } else { // Do calculations in FP32, possibly promoting/demoting arithmetic types of some operands. batchnormRefittedWeights = batchnormWeightRefitter( node, batchNormInputs, [this](ShapedWeights const& w) { return mWeightsContext.getFP32Values(w); }); - if (batchnormRefittedWeights.is_error()) - { - return batchnormRefittedWeights.error(); - } } - successfullyRefittedWeights += batchnormRefittedWeights.value(); - return Status::success(); + successfullyRefittedWeights += batchnormRefittedWeights; } -Status ModelRefitter::refitOnnxIfNode(::ONNX_NAMESPACE::NodeProto const& node) +void ModelRefitter::refitOnnxIfNode(::ONNX_NAMESPACE::NodeProto const& node) { size_t thenGraphOutputSize{}; size_t elseGraphOutputSize{}; @@ -350,44 +335,40 @@ Status ModelRefitter::refitOnnxIfNode(::ONNX_NAMESPACE::NodeProto const& node) if (attr.name() == "then_branch") { ::ONNX_NAMESPACE::GraphProto const& thenGraph = static_cast<::ONNX_NAMESPACE::GraphProto const&>(attr.g()); - CHECK_STATUS(refitOnnxGraph(thenGraph)); + refitOnnxGraph(thenGraph); thenGraphOutputSize = thenGraph.output_size(); } else if (attr.name() == "else_branch") { ::ONNX_NAMESPACE::GraphProto const& elseGraph = static_cast<::ONNX_NAMESPACE::GraphProto const&>(attr.g()); - CHECK_STATUS(refitOnnxGraph(elseGraph)); + refitOnnxGraph(elseGraph); elseGraphOutputSize = elseGraph.output_size(); } } // Number of outputs are the same between the two branches. - ASSERT(thenGraphOutputSize == elseGraphOutputSize - && "then/else subgraphs within the IF node should have the same number of outputs", - ErrorCode::kREFIT_FAILED); - - return Status::success(); + ONNXTRT_CHECK(thenGraphOutputSize == elseGraphOutputSize, + MAKE_ERROR( + "then/else subgraphs within the IF node should have the same number of outputs", ErrorCode::kREFIT_FAILED)); } -Status ModelRefitter::refitOnnxLoopNode(::ONNX_NAMESPACE::NodeProto const& node) +void ModelRefitter::refitOnnxLoopNode(::ONNX_NAMESPACE::NodeProto const& node) { ::ONNX_NAMESPACE::GraphProto const& body = static_cast<::ONNX_NAMESPACE::GraphProto const&>(node.attribute(0).g()); - CHECK_STATUS(refitOnnxGraph(body)); - return Status::success(); + refitOnnxGraph(body); } -Status ModelRefitter::refitOnnxScanNode(::ONNX_NAMESPACE::NodeProto const& node) +void ModelRefitter::refitOnnxScanNode(::ONNX_NAMESPACE::NodeProto const& node) { for (auto const& attr : node.attribute()) { if (attr.name() == "body") { ::ONNX_NAMESPACE::GraphProto const& body = static_cast<::ONNX_NAMESPACE::GraphProto const&>(attr.g()); - CHECK_STATUS(refitOnnxGraph(body)); + refitOnnxGraph(body); break; } } - return Status::success(); } bool ModelRefitter::refitFromBytes( @@ -401,20 +382,10 @@ bool ModelRefitter::refitFromBytes( mWeightsContext.setOnnxFileLocation(modelPath); } - Status status = deserializeOnnxModel(serializedOnnxModel, serializedOnnxModelSize, &onnx_model); - if (status.is_error()) - { - mErrors.push_back(status); - return false; - } + deserializeOnnxModel(serializedOnnxModel, serializedOnnxModelSize, &onnx_model); refittableWeights = getRefittableWeights(); - status = refitOnnxWeights(onnx_model); - if (status.is_error()) - { - mErrors.push_back(status); - return false; - } + refitOnnxWeights(onnx_model); return true; } ONNXTRT_CATCH_LOG(mLogger) @@ -428,22 +399,11 @@ bool ModelRefitter::refitFromFile(char const* onnxModelFile) noexcept // Keep track of the absolute path to the ONNX file. mWeightsContext.setOnnxFileLocation(onnxModelFile); - Status status = deserializeOnnxModelFile(onnxModelFile, onnx_model); - if (status.is_error()) - { - mErrors.push_back(status); - return false; - } - + deserializeOnnxModelFile(onnxModelFile, onnx_model); refittableWeights = getRefittableWeights(); if (!refittableWeights.empty()) { - status = refitOnnxWeights(onnx_model); - if (status.is_error()) - { - mErrors.push_back(status); - return false; - } + refitOnnxWeights(onnx_model); } return true; } diff --git a/ModelRefitter.hpp b/ModelRefitter.hpp index d2a78ca..5b656f0 100644 --- a/ModelRefitter.hpp +++ b/ModelRefitter.hpp @@ -60,17 +60,17 @@ class ModelRefitter : public nvonnxparser::IParserRefitter //! TConvertFunc is a functor for converting ShapedWeights to an array of type T. //! It should return a T*. template - ValueOrStatus batchnormWeightRefitter( + size_t batchnormWeightRefitter( ::ONNX_NAMESPACE::NodeProto const& node, std::vector& inputs, TConvertFunc&& f); - Status refitOnnxWeights(::ONNX_NAMESPACE::ModelProto const& onnx_model); - Status refitOnnxGraph(::ONNX_NAMESPACE::GraphProto const& graph); - Status refitOnnxNode(::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph); - Status refitOnnxConstantNode(::ONNX_NAMESPACE::NodeProto const& node, std::string const& graphName); - Status refitOnnxBatchNormNode(::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph); - Status refitOnnxIfNode(::ONNX_NAMESPACE::NodeProto const& node); - Status refitOnnxLoopNode(::ONNX_NAMESPACE::NodeProto const& node); - Status refitOnnxScanNode(::ONNX_NAMESPACE::NodeProto const& node); + void refitOnnxWeights(::ONNX_NAMESPACE::ModelProto const& onnx_model); + void refitOnnxGraph(::ONNX_NAMESPACE::GraphProto const& graph); + void refitOnnxNode(::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph); + void refitOnnxConstantNode(::ONNX_NAMESPACE::NodeProto const& node, std::string const& graphName); + void refitOnnxBatchNormNode(::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph); + void refitOnnxIfNode(::ONNX_NAMESPACE::NodeProto const& node); + void refitOnnxLoopNode(::ONNX_NAMESPACE::NodeProto const& node); + void refitOnnxScanNode(::ONNX_NAMESPACE::NodeProto const& node); public: ModelRefitter(nvinfer1::IRefitter* refitter, nvinfer1::ILogger* logger) diff --git a/OnnxAttrs.cpp b/OnnxAttrs.cpp index 60ac5b1..0733768 100644 --- a/OnnxAttrs.cpp +++ b/OnnxAttrs.cpp @@ -129,9 +129,9 @@ onnx2trt::ShapedWeights OnnxAttrs::get(std::string cons std::string extName = this->at(key)->ref_attr_name(); bool isExtAttr = isExternalAttribute(extName, mCtx); - ::ONNX_NAMESPACE::TensorProto const& onnx_weights_tensor = isExtAttr ? mCtx->localFunctionStack().back().second.at(extName)->t() : this->at(key)->t(); + ::ONNX_NAMESPACE::TensorProto const& onnxTensor = isExtAttr ? mCtx->localFunctionStack().back().second.at(extName)->t() : this->at(key)->t(); onnx2trt::ShapedWeights weights; - bool success = mCtx->getWeightsContext().convertOnnxWeights(onnx_weights_tensor, &weights); + bool success = mCtx->getWeightsContext().convertOnnxWeights(onnxTensor, &weights, true); if (!success) { throw std::runtime_error{"Unable to convert ONNX weights"}; diff --git a/README.md b/README.md index 6962df2..ed1474f 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ For press and other inquiries, please contact Hector Marinez at hmarinez@nvidia. ## Supported TensorRT Versions -Development on the this branch is for the latest version of [TensorRT 10.2](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support. +Development on the this branch is for the latest version of [TensorRT 10.4](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support. For previous versions of TensorRT, refer to their respective branches. @@ -29,8 +29,8 @@ Current supported ONNX operators are found in the [operator support matrix](docs ### Dependencies - [Protobuf >= 3.0.x](https://github.com/google/protobuf/releases) - - [TensorRT 10.2](https://developer.nvidia.com/tensorrt) - - [TensorRT 10.2 open source libaries] (https://github.com/NVIDIA/TensorRT/) + - [TensorRT 10.4](https://developer.nvidia.com/tensorrt) + - [TensorRT 10.4 open source libaries] (https://github.com/NVIDIA/TensorRT/) ### Building @@ -82,7 +82,7 @@ Refer to the link or run `polygraphy run -h` for more information on CLI options Python bindings for the ONNX-TensorRT parser are packaged in the shipped `.whl` files. -TensorRT 10.1 supports ONNX release 1.16.0. Install it with: +TensorRT 10.4 supports ONNX release 1.16.0. Install it with: python3 -m pip install onnx==1.16.0 diff --git a/ShapeTensor.cpp b/ShapeTensor.cpp index f177136..ffc6bc7 100644 --- a/ShapeTensor.cpp +++ b/ShapeTensor.cpp @@ -542,7 +542,7 @@ nvinfer1::ISliceLayer* addSlice(ImporterContext* ctx, nvinfer1::ITensor& data, c constexpr int32_t minDim = std::numeric_limits::min(); constexpr int32_t maxDim = std::numeric_limits::max(); nvinfer1::ISliceLayer* slice = N_CHECK(ctx->network()->addSlice(data, - shapeTensorToDims(starts, "slice start", 0, maxDim), shapeTensorToDims(sizes, "slice size", 0, maxDim), + shapeTensorToDims(starts, "slice start", minDim, maxDim), shapeTensorToDims(sizes, "slice size", 0, maxDim), shapeTensorToDims(strides, "slide strides", minDim, maxDim))); setShapeInputIfDynamic(ctx, slice, 1, starts); setShapeInputIfDynamic(ctx, slice, 2, sizes); diff --git a/Status.hpp b/Status.hpp index 98c0909..2af35a0 100644 --- a/Status.hpp +++ b/Status.hpp @@ -203,6 +203,7 @@ static std::ostream& operator<<(std::ostream& stream, nvinfer1::DataType const& case nvinfer1::DataType::kBOOL: return stream << "bool"; case nvinfer1::DataType::kFP8: return stream << "float8"; case nvinfer1::DataType::kINT4: return stream << "int4"; + default: throw std::runtime_error("Unknown dtype"); } } diff --git a/docs/Changelog.md b/docs/Changelog.md index 700f447..d390b37 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -2,6 +2,14 @@ # ONNX-TensorRT Changelog +# TensorRT 10.4 GA Release - 2024-9-5 +For more details, see the 10.4 GA release notes. + +- Added support for tensor `axes` for `Pad` operations +- Added support for `BlackmanWindow`, `HammingWindow`, and `HannWindow` operations +- Improved error handling in `IParserRefitter` +- Fixed kernel shape inference in multi-input convolutions + # TensorRT 10.3 GA Release - 2024-8-7 For more details, see the 10.3 GA release notes. @@ -14,13 +22,14 @@ For more details, see the 10.2 GA release notes. - Improved error handling with new macros and classes - Minor changes to op importers for `GRU` and `Squeeze` -# TensorRT 10.1 GA Release - 2024-6-17 +# TensorRT 10.1 GA Release - 2024-6-10 For more details, see the 10.1 GA release notes. - Added `supportsModelV2` API - Added support for `DeformConv` operation - Added support for `PluginV3` TensorRT Plugins - Marked all IParser and IParserRefitter APIs as `noexcept` +- Shape inputs can be passed to custom ops supported by `IPluginV3`-based plugins by indicating the input indices to be interpreted as shape inputs by a node attribute named `tensorrt_plugin_shape_input_indices`. # TensorRT 10.0 GA Release - 2024-4-25 For more details, see the 10.0 GA release notes. diff --git a/docs/operators.md b/docs/operators.md index 0a1bcfa..170fbb1 100644 --- a/docs/operators.md +++ b/docs/operators.md @@ -2,7 +2,7 @@ # Supported ONNX Operators -TensorRT 10.0 supports operators in the inclusive range of opset 9 to opset 20. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/main/docs/Operators.md). More details and limitations are documented in the chart below. +TensorRT 10.4 supports operators in the inclusive range of opset 9 to opset 20. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/main/docs/Operators.md). More details and limitations are documented in the chart below. TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOAT16, INT32, INT64, FP8, INT8, INT4, UINT8, and BOOL @@ -36,7 +36,7 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA | BitwiseNot | N | | BitwiseOr | N | | BitwiseXor | N | -| BlackmanWindow | N | +| BlackmanWindow | Y | | Cast | Y | FP32, FP16, BF16, INT32, INT64, UINT8, BOOL | | | CastLike | Y | FP32, FP16, BF16, INT32, INT64, UINT8, BOOL | | | Ceil | Y | FP32, FP16, BF16 | @@ -85,8 +85,8 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA | GridSample | Y | FP32, FP16 | Input must be 4D input. | GroupNormalization | Y | FP32, FP16, BF16 | | GRU | Y | FP32, FP16, BF16 | For bidirectional GRUs, activation functions must be the same for both the forward and reverse pass -| HammingWindow | N | -| HannWindow | N | +| HammingWindow | Y | +| HannWindow | Y | | HardSigmoid | Y | FP32, FP16, BF16 | | HardSwish | Y | FP32, FP16, BF16 | | Hardmax | Y | FP32, FP16, BF16 | `axis` dimension of input must be a build-time constant @@ -132,7 +132,7 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA | OptionalGetElement | N | | OptionalHasElement | N | | Or | Y | BOOL | -| Pad | Y | FP32, FP16, BF16, INT32, INT64 | `axes` must be an initializer | +| Pad | Y | FP32, FP16, BF16, INT32, INT64 | | ParametricSoftplus | Y | FP32, FP16, BF16 | | Pow | Y | FP32, FP16, BF16 | | PRelu | Y | FP32, FP16, BF16 | @@ -184,7 +184,7 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA | Sin | Y | FP32, FP16, BF16 | | Sinh | Y | FP32, FP16, BF16 | | Size | Y | FP32, FP16, BF16, INT32, INT64, BOOL | -| Slice | Y | FP32, FP16, BF16, INT32, INT64, BOOL | +| Slice | Y | FP32, FP16, BF16, INT32, INT64, BOOL | | Softmax | Y | FP32, FP16, BF16 | | SoftmaxCrossEntropyLoss | N | | Softplus | Y | FP32, FP16, BF16 | diff --git a/importerUtils.cpp b/importerUtils.cpp index c130889..1ec5b3e 100644 --- a/importerUtils.cpp +++ b/importerUtils.cpp @@ -3,7 +3,6 @@ */ #include "importerUtils.hpp" -#include "NvInferSafeRuntime.h" #include "OnnxAttrs.hpp" #include "bfloat16.hpp" #include @@ -877,15 +876,6 @@ nvinfer1::IPluginCreatorInterface* importPluginCreator(ImporterContext* ctx, std creator = pluginRegistry.getCreator(pluginName.c_str(), pluginVersion.c_str(), pluginNamespace.c_str()); #endif // ENABLE_STD_PLUGIN -#if ENABLE_SAFE_PLUGIN - auto safetyPluginRegistry = nvinfer1::getBuilderSafePluginRegistry(nvinfer1::EngineCapability::kSAFETY); - if (creator == nullptr && safetyPluginRegistry != nullptr) - { - creator = safetyPluginRegistry->getPluginCreator( - pluginName.c_str(), pluginVersion.c_str(), pluginNamespace.c_str()); - } -#endif // ENABLE_SAFE_PLUGIN - // Do not perform a N_CHECK here as a plugin not being found is a valid case. It is up to the callers to handle the // nullptr correctly. return creator; @@ -1207,8 +1197,8 @@ NodeImportResult instanceNormPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE tensorPtr = unsqueezeTensor(ctx, node, *tensorPtr, axes); ASSERT(tensorPtr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); } - auto scale_weights = inputs.at(1).weights(); - auto bias_weights = inputs.at(2).weights(); + auto scaleWeights = inputs.at(1).weights(); + auto biasWeights = inputs.at(2).weights(); OnnxAttrs attrs(node, ctx); float epsilon = attrs.get("epsilon", 1e-5F); int32_t const relu{0}; // the ONNX instance norm op does not use the relu parameter @@ -1220,12 +1210,12 @@ NodeImportResult instanceNormPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE std::vector f; // get the values of constant inputs and cast them to float32 - float const* scaleValues = ctx->getWeightsContext().getFP32Values(scale_weights); - float const* biasValues = ctx->getWeightsContext().getFP32Values(bias_weights); + float const* scaleValues = ctx->getWeightsContext().getFP32Values(scaleWeights); + float const* biasValues = ctx->getWeightsContext().getFP32Values(biasWeights); f.emplace_back("epsilon", &epsilon, nvinfer1::PluginFieldType::kFLOAT32, 1); - f.emplace_back("scales", scaleValues, nvinfer1::PluginFieldType::kFLOAT32, scale_weights.count()); - f.emplace_back("bias", biasValues, nvinfer1::PluginFieldType::kFLOAT32, bias_weights.count()); + f.emplace_back("scales", scaleValues, nvinfer1::PluginFieldType::kFLOAT32, scaleWeights.count()); + f.emplace_back("bias", biasValues, nvinfer1::PluginFieldType::kFLOAT32, biasWeights.count()); f.emplace_back("relu", &relu, nvinfer1::PluginFieldType::kINT32, 1); f.emplace_back("alpha", &alpha, nvinfer1::PluginFieldType::kFLOAT32, 1); @@ -1335,6 +1325,26 @@ NodeImportResult normalizationHelper(ImporterContext* ctx, const ::ONNX_NAMESPAC return {{output}}; } +Status normalizeAxes(ShapeTensor& axes, int32_t const rank) +{ + ASSERT(axes.allValuesKnown() && "Axes should not contain unknown values.", ErrorCode::kINTERNAL_ERROR); + std::vector newAxes; + newAxes.reserve(axes.size()); + for (int64_t axis : axes) + { + ASSERT((-rank <= axis && axis < rank) && "Axis must be in the range of [-rank, rank-1].", + ErrorCode::kINVALID_VALUE); + // "Negative value means counting dimensions from the back." + if (axis < 0) + { + axis += rank; + } + newAxes.push_back(axis); + } + axes = ShapeTensor(1, std::move(newAxes)); + return Status::success(); +} + nvinfer1::Dims insertDimension(nvinfer1::Dims const& dims, int const axis, int const value) { if (axis >= nvinfer1::Dims::MAX_DIMS || dims.nbDims >= nvinfer1::Dims::MAX_DIMS) @@ -1743,22 +1753,27 @@ NodeImportResult convMultiInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::No std::vector& inputs) { ASSERT(inputs.size() >= 2 && "Convolution require at least 2 inputs.", ErrorCode::kUNSUPPORTED_NODE); - nvinfer1::ITensor* input_tensor_ptr = &convertToTensor(inputs.at(0), ctx); - nvinfer1::Dims const input_dims = input_tensor_ptr->getDimensions(); - - nvinfer1::Dims dims = input_dims; + nvinfer1::ITensor* input = &convertToTensor(inputs.at(0), ctx); + nvinfer1::Dims dims = input->getDimensions(); bool needToExpandDims = (dims.nbDims == 3); if (needToExpandDims) { // Expand spatial dims from 1D to 2D - const std::vector axes{3}; - input_tensor_ptr = unsqueezeTensor(ctx, node, *input_tensor_ptr, axes); - dims = input_tensor_ptr->getDimensions(); + std::vector const axes{3}; + input = unsqueezeTensor(ctx, node, *input, axes); + dims = input->getDimensions(); } auto const nbSpatialDims = dims.nbDims - 2; - nvinfer1::Dims filter_dim; - filter_dim.nbDims = nbSpatialDims; + nvinfer1::Dims kernelDims; + kernelDims.nbDims = nbSpatialDims; + + // Populate spatial dims from the shape of the convolution weights. + for (int32_t i = 1; i <= nbSpatialDims; ++i) + { + kernelDims.d[nbSpatialDims - i] = inputs.at(1).shape().d[inputs.at(1).shape().nbDims - i]; + } + nvinfer1::Dims strides = makeDims(nbSpatialDims, 1); nvinfer1::Dims begPadding = makeDims(nbSpatialDims, 0); nvinfer1::Dims endPadding = makeDims(nbSpatialDims, 0); @@ -1766,49 +1781,49 @@ NodeImportResult convMultiInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::No nvinfer1::PaddingMode paddingMode; bool excludePadding{false}; CHECK_STATUS(getKernelParams( - ctx, node, &filter_dim, &strides, &begPadding, &endPadding, paddingMode, excludePadding, &dilations)); + ctx, node, &kernelDims, &strides, &begPadding, &endPadding, paddingMode, excludePadding, &dilations)); auto const nChannel = dims.d[1]; auto const K = inputs.at(1).shape().d[0]; auto const C = inputs.at(1).shape().d[1]; - auto kernel_weights = ShapedWeights::empty(::ONNX_NAMESPACE::TensorProto::FLOAT); - auto bias_weights = ShapedWeights::empty(::ONNX_NAMESPACE::TensorProto::FLOAT); + auto kernelWeights = ShapedWeights::empty(::ONNX_NAMESPACE::TensorProto::FLOAT); + auto biasWeights = ShapedWeights::empty(::ONNX_NAMESPACE::TensorProto::FLOAT); - auto const checkSpatialDims = [&nbSpatialDims, &filter_dim](nvinfer1::Dims const& dims) { + auto const checkSpatialDims = [&nbSpatialDims, &kernelDims](nvinfer1::Dims const& dims) { // Check that the number of spatial dimensions and the kernel shape matches up. if (nbSpatialDims != dims.nbDims - 2) { return false; } - return std::equal(filter_dim.d, filter_dim.d + nbSpatialDims, dims.d + dims.nbDims - nbSpatialDims); + return std::equal(kernelDims.d, kernelDims.d + nbSpatialDims, dims.d + dims.nbDims - nbSpatialDims); }; - nvinfer1::ITensor* kernel_tensor_ptr{nullptr}; - nvinfer1::ITensor* bias_tensor_ptr{nullptr}; + nvinfer1::ITensor* kernelTensor{nullptr}; + nvinfer1::ITensor* biasTensor{nullptr}; if (inputs.at(1).is_tensor()) { - kernel_tensor_ptr = &convertToTensor(inputs.at(1), ctx); + kernelTensor = &convertToTensor(inputs.at(1), ctx); if (needToExpandDims) { // Expand spatial dims from 1D to 2D std::vector const axes{3}; - kernel_tensor_ptr = unsqueezeTensor(ctx, node, *kernel_tensor_ptr, axes); - ASSERT(kernel_tensor_ptr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); + kernelTensor = unsqueezeTensor(ctx, node, *kernelTensor, axes); + ASSERT(kernelTensor && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); } - ASSERT(checkSpatialDims(kernel_tensor_ptr->getDimensions()) + ASSERT(checkSpatialDims(kernelTensor->getDimensions()) && "The input tensor shape misaligns with the input kernel shape.", ErrorCode::kUNSUPPORTED_NODE); } else { - kernel_weights = inputs.at(1).weights(); + kernelWeights = inputs.at(1).weights(); if (needToExpandDims) { - kernel_weights.shape.nbDims = 4; - kernel_weights.shape.d[3] = 1; + kernelWeights.shape.nbDims = 4; + kernelWeights.shape.d[3] = 1; } - ASSERT_NODE(checkSpatialDims(kernel_weights.shape), + ASSERT_NODE(checkSpatialDims(kernelWeights.shape), "The input tensor shape misaligns with the input kernel shape.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); } @@ -1817,11 +1832,11 @@ NodeImportResult convMultiInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::No { if (inputs.at(2).is_weights()) { - bias_weights = inputs.at(2).weights(); + biasWeights = inputs.at(2).weights(); } else { - bias_tensor_ptr = &convertToTensor(inputs.at(2), ctx); + biasTensor = &convertToTensor(inputs.at(2), ctx); } } @@ -1832,7 +1847,7 @@ NodeImportResult convMultiInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::No ErrorCode::kINVALID_NODE); nvinfer1::IConvolutionLayer* layer - = N_CHECK(ctx->network()->addConvolutionNd(*input_tensor_ptr, K, filter_dim, kernel_weights, bias_weights)); + = N_CHECK(ctx->network()->addConvolutionNd(*input, K, kernelDims, kernelWeights, biasWeights)); ASSERT_NODE(layer, "Failed to add the Convolution layer.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); layer->setStrideNd(strides); layer->setPaddingMode(paddingMode); @@ -1842,13 +1857,13 @@ NodeImportResult convMultiInput(ImporterContext* ctx, const ::ONNX_NAMESPACE::No layer->setNbGroups(ngroup); // Set dynamic weights - if (kernel_tensor_ptr) + if (kernelTensor) { - layer->setInput(1, *kernel_tensor_ptr); + layer->setInput(1, *kernelTensor); } - if (bias_tensor_ptr) + if (biasTensor) { - layer->setInput(2, *bias_tensor_ptr); + layer->setInput(2, *biasTensor); } ctx->registerLayer(layer, node); @@ -2332,4 +2347,44 @@ nvinfer1::IEinsumLayer* parseGraphWithMoreInputs(ImporterContext* ctx, ::ONNX_NA return einsumLayer; } +nvinfer1::ITensor* generateWindow(ImporterContext* ctx, nvinfer1::ITensor* N) +{ + auto shapeOfN = ShapeTensor(*N, 0); + nvinfer1::IFillLayer* layer = N_CHECK(addFill(ctx, convertTo1D(ctx, shapeOfN), nvinfer1::FillOperation::kLINSPACE)); + layer->setAlpha(0.0F); + layer->setBeta(1.0F); + auto* fillOutput = N_CHECK(layer->getOutput(0)); + return fillOutput; +} + +nvinfer1::ITensor* windowHelper(ImporterContext* ctx, float numerator, nvinfer1::ITensor* n, nvinfer1::ITensor* N, + nvinfer1::UnaryOperation op, int32_t periodic) +{ + auto* numeratorTensor = N_CHECK(addConstantScalar(ctx, numerator, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT, + nvinfer1::Dims{1, {1}})->getOutput(0)); + auto numeratorLayer + = N_CHECK(ctx->network()->addElementWise(*numeratorTensor, *n, nvinfer1::ElementWiseOperation::kPROD)); + auto numeratorOutput = N_CHECK(numeratorLayer->getOutput(0)); + + // If periodic is 0, subtract 1 from the denominator (N) + if (periodic == 0) + { + auto* one = N_CHECK(addConstantScalar(ctx, 1, ::ONNX_NAMESPACE::TensorProto_DataType_INT32)->getOutput(0)); + one = castHelper(ctx, one, N->getType()); + auto minusOne = N_CHECK(ctx->network()->addElementWise(*N, *one, nvinfer1::ElementWiseOperation::kSUB)); + N = N_CHECK(minusOne->getOutput(0)); + } + + auto NFloat = N_CHECK(castHelper(ctx, N, nvinfer1::DataType::kFLOAT)); + broadcastTensors(ctx, n, NFloat); + auto divLayer + = N_CHECK(ctx->network()->addElementWise(*numeratorOutput, *NFloat, nvinfer1::ElementWiseOperation::kDIV)); + auto divOutput = N_CHECK(divLayer->getOutput(0)); + + auto trigLayer = N_CHECK(ctx->network()->addUnary(*divOutput, op)); + auto trigOutput = N_CHECK(trigLayer->getOutput(0)); + + return N_CHECK(trigOutput); +} + } // namespace onnx2trt diff --git a/importerUtils.hpp b/importerUtils.hpp index 94699ac..73abe9c 100644 --- a/importerUtils.hpp +++ b/importerUtils.hpp @@ -259,6 +259,10 @@ nvinfer1::Dims makeDims(int nbDims, int val); NodeImportResult normalizationHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx, std::vector& inputs); +// Given a list of axes in the range of [-rank, rank-1], where rank is the rank +// of the corresponding data tensor, normalize to [0, rank-1]. +Status normalizeAxes(ShapeTensor& axes, int32_t const rank); + // Helper function to parse activation values for LSTM nodes std::vector parseLSTMActivationValues(std::vector const& activationTypes, std::vector const& activationValues, bool isAlpha); @@ -430,8 +434,16 @@ Status processEllipsisAndImplicitOutput( nvinfer1::IEinsumLayer* parseGraphWithMoreInputs(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, std::vector const& inputs, int64_t const nbInputs, std::string equation); +// Helper function to convert TensorRT datatype enum into a human-readable string. std::string getTrtDtypeName(nvinfer1::DataType TrtDtype); +// Helper fucntion to generate a Window tensor for Window operations (HannWindow, HammingWindow, BlackmanWindow). +nvinfer1::ITensor* generateWindow(ImporterContext* ctx, nvinfer1::ITensor* N); + +// Helper function to handle Window generation ops. Calculates TrigOp(numerator*n / N) and returns the output tensor. +nvinfer1::ITensor* windowHelper(ImporterContext* ctx, float numerator, nvinfer1::ITensor* n, nvinfer1::ITensor* N, + nvinfer1::UnaryOperation op, int32_t periodic); + //! Describes occurrence of a named dimension. class NamedDimension { diff --git a/onnxOpCheckers.cpp b/onnxOpCheckers.cpp index f3c7f9f..103df3a 100644 --- a/onnxOpCheckers.cpp +++ b/onnxOpCheckers.cpp @@ -17,7 +17,6 @@ #include "importerUtils.hpp" #include -#include #include #include #include @@ -175,6 +174,8 @@ DEFINE_OP_CHECKER(BatchNormalization) ErrorCode::kUNSUPPORTED_NODE, node, errors, nodeIndex); } +DEFINE_OP_EMPTY_CHECKER(BlackmanWindow) + DEFINE_OP_CHECKER(Cast) { OnnxAttrs attrs(node, ctx); @@ -359,6 +360,10 @@ DEFINE_OP_CHECKER(GRU) } } +DEFINE_OP_EMPTY_CHECKER(HammingWindow) + +DEFINE_OP_EMPTY_CHECKER(HannWindow) + DEFINE_OP_EMPTY_CHECKER(Hardmax) DEFINE_OP_EMPTY_CHECKER(HardSigmoid) @@ -1046,11 +1051,6 @@ DEFINE_OP_CHECKER(Bernoulli) STATIC_CHECK(false, ErrorCode::kUNSUPPORTED_NODE, node, errors, nodeIndex); } -DEFINE_OP_CHECKER(BlackmanWindow) -{ - STATIC_CHECK(false, ErrorCode::kUNSUPPORTED_NODE, node, errors, nodeIndex); -} - DEFINE_OP_CHECKER(CenterCropPad) { STATIC_CHECK(false, ErrorCode::kUNSUPPORTED_NODE, node, errors, nodeIndex); @@ -1061,16 +1061,6 @@ DEFINE_OP_CHECKER(DynamicQuantizeLinear) STATIC_CHECK(false, ErrorCode::kUNSUPPORTED_NODE, node, errors, nodeIndex); } -DEFINE_OP_CHECKER(HammingWindow) -{ - STATIC_CHECK(false, ErrorCode::kUNSUPPORTED_NODE, node, errors, nodeIndex); -} - -DEFINE_OP_CHECKER(HannWindow) -{ - STATIC_CHECK(false, ErrorCode::kUNSUPPORTED_NODE, node, errors, nodeIndex); -} - DEFINE_OP_CHECKER(NegativeLogLikelihoodLoss) { STATIC_CHECK(false, ErrorCode::kUNSUPPORTED_NODE, node, errors, nodeIndex); diff --git a/onnxOpImporters.cpp b/onnxOpImporters.cpp index fa37ec7..45d19f1 100644 --- a/onnxOpImporters.cpp +++ b/onnxOpImporters.cpp @@ -2,7 +2,11 @@ * SPDX-License-Identifier: Apache-2.0 */ -#include "onnxOpImporters.hpp" +#if defined(_MSC_VER) +#define _USE_MATH_DEFINES +#endif +#include + #include "ConditionalHelpers.hpp" #include "LoopHelpers.hpp" #include "ModelImporter.hpp" @@ -15,10 +19,10 @@ #include "bfloat16.hpp" #include "half.h" #include "importerUtils.hpp" +#include "onnxOpImporters.hpp" #include // For std::min, std::max #include -#include #include // For std::memcpy, std::memset #include #include @@ -382,6 +386,63 @@ DEFINE_BUILTIN_OP_IMPORTER(BatchNormalization) ShapedWeights::empty(::ONNX_NAMESPACE::TensorProto::FLOAT), combinedBias.getName(), combinedScale.getName()); } +DEFINE_BUILTIN_OP_IMPORTER(BlackmanWindow) +{ + + /*** + + Operation returns a window vector, where + + Y[n] = 0.42 - 0.5cos(2pi*n / N) + 0.08cos(4pi*n / N) + + Where N is the window length, and n is each element in the window. + + Note that if `periodic == 0`, the denominator becomes N - 1. + + This can be represented by creating a range 'n' from 0 -> N, and performing the operations elementwise. + + ***/ + + OnnxAttrs attrs(node, ctx); + int32_t outputDtype = attrs.get("output_datatype", 1); + int32_t periodic = attrs.get("periodic", 1); + ASSERT_NODE(outputDtype == 1, "Output must be float32-type!", node, nodeIdx, ErrorCode::kINVALID_NODE); + + constexpr float alpha = 0.42F; + constexpr float beta = 0.5F; + constexpr float gamma = 0.08F; + + auto* N = &convertToTensor(inputs.at(0), ctx); + ASSERT_NODE( + N->getDimensions().nbDims == 0, "Window length must be a scalar!", node, nodeIdx, ErrorCode::kINVALID_NODE); + auto* window = generateWindow(ctx, N); + + auto lhsCosOutput = windowHelper(ctx, 2.F * M_PI, window, N, nvinfer1::UnaryOperation::kCOS, periodic); + + auto betaTensor = N_CHECK(addConstantScalar(ctx, beta, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT, + nvinfer1::Dims{1, {1}})->getOutput(0)); + auto betaLayer + = N_CHECK(ctx->network()->addElementWise(*betaTensor, *lhsCosOutput, nvinfer1::ElementWiseOperation::kPROD)); + auto betaOutput = N_CHECK(betaLayer->getOutput(0)); + + auto rhsCosOutput = windowHelper(ctx, 4.F * M_PI, window, N, nvinfer1::UnaryOperation::kCOS, periodic); + auto gammaTensor = N_CHECK(addConstantScalar(ctx, gamma, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT, + nvinfer1::Dims{1, {1}})->getOutput(0)); + auto gammaLayer + = N_CHECK(ctx->network()->addElementWise(*gammaTensor, *rhsCosOutput, nvinfer1::ElementWiseOperation::kPROD)); + auto gammaOutput = N_CHECK(gammaLayer->getOutput(0)); + + auto alphaTensor = N_CHECK(addConstantScalar(ctx, alpha, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT, + nvinfer1::Dims{1, {1}})->getOutput(0)); + auto alphaMinusBeta + = N_CHECK(ctx->network()->addElementWise(*alphaTensor, *betaOutput, nvinfer1::ElementWiseOperation::kSUB)); + auto alphaMinusBetaTensor = N_CHECK(alphaMinusBeta->getOutput(0)); + + auto plusGamma = N_CHECK( + ctx->network()->addElementWise(*alphaMinusBetaTensor, *gammaOutput, nvinfer1::ElementWiseOperation::kSUM)); + RETURN_FIRST_OUTPUT(plusGamma, node, nodeIdx); +} + DEFINE_BUILTIN_OP_IMPORTER(Cast) { // Get input node. @@ -1652,6 +1713,9 @@ NodeImportResult QuantDequantLinearHelper(ImporterContext* ctx, ::ONNX_NAMESPACE layer->setInput(2, *zeroPointInput); } + // Register the Q/DQ layer. + ctx->registerLayer(layer, node); + // Return layer output RETURN_FIRST_OUTPUT(layer, node, nodeIdx); } @@ -2492,9 +2556,11 @@ DEFINE_BUILTIN_OP_IMPORTER(GRU) // H(t) = (1 - z(t)) . h(t) + (z(t) . H(t-1)) // Constant `1` needs to be the same type as the inputs, either FP16 or FP32. - auto onnxType = zt->getType() == nvinfer1::DataType::kHALF ? ::ONNX_NAMESPACE::TensorProto::FLOAT16 - : ::ONNX_NAMESPACE::TensorProto::FLOAT; - auto* constOne = N_CHECK(addConstantScalar(ctx, 1.f, onnxType, Dims3{1, 1, 1})->getOutput(0)); + auto* constOne = zt->getType() == nvinfer1::DataType::kHALF + ? N_CHECK(addConstantScalar( + ctx, static_cast(1), ::ONNX_NAMESPACE::TensorProto::FLOAT16, Dims3{1, 1, 1}) + ->getOutput(0)) + : N_CHECK(addConstantScalar(ctx, 1.f, ::ONNX_NAMESPACE::TensorProto::FLOAT, Dims3{1, 1, 1})->getOutput(0)); nvinfer1::ITensor* Ht = getElementWiseResult(ctx, *getElementWiseResult(ctx, *getElementWiseResult(ctx, *constOne, *zt, eOp::kSUB), *ht, eOp::kPROD), *getElementWiseResult(ctx, *zt, *Ht1Output, eOp::kPROD), eOp::kSUM); @@ -2526,6 +2592,90 @@ DEFINE_BUILTIN_OP_IMPORTER(GRU) return {{outputs}}; } +DEFINE_BUILTIN_OP_IMPORTER(HammingWindow) +{ + /*** + + Operation returns a window vector, where: + + Y[n] = alpha - beta * cos(2*pi*n / N) + + Where N is the window length, and n is each element in the window. + + Note that if `periodic == 0`, the denominator becomes N - 1. + + This can be represented by creating a range 'n' from 0 -> N, and performing the operations elementwise. + + Note that in the ONNX op definition alpha and beta are not provided. We will use the default values defined in ONNX: + + alpha = 25/46 + beta = 1 - alpha + + ***/ + + OnnxAttrs attrs(node, ctx); + int32_t outputDtype = attrs.get("output_datatype", 1); + int32_t periodic = attrs.get("periodic", 1); + ASSERT_NODE(outputDtype == 1, "Output must be float32-type!", node, nodeIdx, ErrorCode::kINVALID_NODE); + + constexpr float alpha = 25.F / 46.F; + constexpr float beta = 1.F - alpha; + + auto* N = &convertToTensor(inputs.at(0), ctx); + ASSERT_NODE( + N->getDimensions().nbDims == 0, "Window length must be a scalar!", node, nodeIdx, ErrorCode::kINVALID_NODE); + auto* window = generateWindow(ctx, N); + + auto* cosOutput = windowHelper(ctx, 2.F * M_PI, window, N, nvinfer1::UnaryOperation::kCOS, periodic); + + auto betaTensor = N_CHECK(addConstantScalar(ctx, beta, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT, + nvinfer1::Dims{1, {1}})->getOutput(0)); + auto betaLayer + = N_CHECK(ctx->network()->addElementWise(*betaTensor, *cosOutput, nvinfer1::ElementWiseOperation::kPROD)); + auto betaOutput = N_CHECK(betaLayer->getOutput(0)); + + auto alphaTensor = N_CHECK(addConstantScalar(ctx, alpha, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT, + nvinfer1::Dims{1, {1}})->getOutput(0)); + auto alphaLayer + = N_CHECK(ctx->network()->addElementWise(*alphaTensor, *betaOutput, nvinfer1::ElementWiseOperation::kSUB)); + + RETURN_FIRST_OUTPUT(alphaLayer, node, nodeIdx); +} + +DEFINE_BUILTIN_OP_IMPORTER(HannWindow) +{ + /*** + + Operation returns a window vector, where: + + Y[n] = sin^2(pi*n / N) + + Where N is the window length, and n is each element in the window. + + Note that if `periodic == 0`, the denominator becomes N - 1. + + This can be represented by creating a range 'n' from 0 -> N, and performing the operations elementwise. + + ***/ + + OnnxAttrs attrs(node, ctx); + int32_t outputDtype = attrs.get("output_datatype", 1); + int32_t periodic = attrs.get("periodic", 1); + ASSERT_NODE(outputDtype == 1, "Output must be float32-type!", node, nodeIdx, ErrorCode::kINVALID_NODE); + + auto* N = &convertToTensor(inputs.at(0), ctx); + ASSERT_NODE( + N->getDimensions().nbDims == 0, "Window length must be a scalar!", node, nodeIdx, ErrorCode::kINVALID_NODE); + auto* window = generateWindow(ctx, N); + + auto sinOutput = windowHelper(ctx, M_PI, window, N, nvinfer1::UnaryOperation::kSIN, periodic); + + auto sinSquaredLayer + = N_CHECK(ctx->network()->addElementWise(*sinOutput, *sinOutput, nvinfer1::ElementWiseOperation::kPROD)); + + RETURN_FIRST_OUTPUT(sinSquaredLayer, node, nodeIdx); +} + DEFINE_BUILTIN_OP_IMPORTER(Hardmax) { CHECK_STATUS(notInvalidType(inputs.at(0), {"INT64", "INT32", "INT8", "UINT8", "BOOL"}, node, nodeIdx)); @@ -3795,7 +3945,6 @@ DEFINE_BUILTIN_OP_IMPORTER(Pad) float value{0.F}; nvinfer1::ITensor* valuePtr = nullptr; std::vector onnxPadding; - std::vector padAxes; if (ctx->getOpsetVersion() < 11) { @@ -3847,106 +3996,15 @@ DEFINE_BUILTIN_OP_IMPORTER(Pad) valuePtr = &convertToTensor(inputs.at(2), ctx); } } - // Opset 16 optional `axes` input. - if (inputs.size() == 4 && !inputs.at(3).isNullTensor()) - { - // Currently, `axes` input is supported only as an initializer. - if (inputs.at(3).is_weights()) - { - // `axes` is an initializer input. - CHECK_STATUS(weightsToVector(inputs.at(3).weights(), &padAxes)); - // Sanity check. - ASSERT_NODE(std::unordered_set(padAxes.begin(), padAxes.end()).size() == padAxes.size(), - "The input axes must have unique elements.", node, nodeIdx, ErrorCode::kINVALID_NODE); - // Accepted range of axis is [-r, r-1] where r = rank(data). - for (int32_t& axis : padAxes) - { - CHECK_STATUS(convertAxis(axis, nbDims, node, nodeIdx)); - } - } - else - { - // `axes` is a non-null tensor input. - ASSERT_NODE(false, "TensorRT does not support dynamic axes for pad!", node, nodeIdx, - ErrorCode::kUNSUPPORTED_NODE_INPUT); - } - } } - nvinfer1::ITensor* start{}; - nvinfer1::ITensor* size{}; - if (onnxPadding.empty()) - { - // `pads` is from activation instead of initializer or attributes. - nvinfer1::ITensor* onnxPaddingPtr = &convertToTensor(inputs.at(1), ctx); - ASSERT_NODE((onnxPaddingPtr->getDimensions().nbDims == 1), - "The padding input must be 1D. The rank of padding input = " << onnxPaddingPtr->getDimensions().nbDims - << ".", - node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); - - // If `axes` is a non-empty input, onnxPaddingPtr needs to be updated with information from `axes`. - // Currently, `axes` is supported only if it's an initializer input. - if (!padAxes.empty()) - { - ASSERT_NODE(static_cast(onnxPaddingPtr->getDimensions().d[0]) == padAxes.size() * 2, - "pads should be twice the length of input axes i.e. " - << 2 * padAxes.size() << ", actual length is: " << onnxPaddingPtr->getDimensions().d[0], - node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + auto padAxes + = inputs.size() == 4 && !inputs.at(3).isNullTensor() ? ShapeTensor(ctx, inputs.at(3)) : iotaShapeVector(nbDims); - // onnxPaddingPtr is of the format [x1_begin, x2_begin, ..., x1_end, x2_end,...]. - ShapeTensor const paddingLen = gather(ctx, shapeOf(*onnxPaddingPtr), shapeVector(0)); - ShapeTensor const halfPaddingLen = floorDiv(ctx, paddingLen, shapeVector(2)); - // Obtain begins [x1_begin, x2_begin, ...,]. - nvinfer1::ISliceLayer* beginSliceLayer - = addSlice(ctx, *onnxPaddingPtr, shapeVector(0), halfPaddingLen, shapeVector(1)); - ctx->registerLayer(beginSliceLayer, node); - nvinfer1::ITensor* beginPads = beginSliceLayer->getOutput(0); - // Obtain ends [x1_end, x2_end, ...]. - nvinfer1::ISliceLayer* endSliceLayer - = addSlice(ctx, *onnxPaddingPtr, halfPaddingLen, halfPaddingLen, shapeVector(1)); - ctx->registerLayer(endSliceLayer, node); - nvinfer1::ITensor* endPads = endSliceLayer->getOutput(0); - - // Map axes to corresponding begins & ends and create ordered begins & ends. - std::vector padAxesLongInt(padAxes.begin(), padAxes.end()); - ShapeTensor const subscripts{axesToInterlaceSubscripts(ShapeTensor(1, std::move(padAxesLongInt)), nbDims)}; - ShapeTensor const orderedBeginPads - = interlace(ctx, similar(ctx, tensorDims, 0), ShapeTensor(*beginPads), subscripts); - ShapeTensor const orderedEndPads - = interlace(ctx, similar(ctx, tensorDims, 0), ShapeTensor(*endPads), subscripts); - - // Concatenate ordered begins & ends along zeroth dimension. - std::vector tensors{&orderedBeginPads.tensor(ctx), &orderedEndPads.tensor(ctx)}; - auto* concatLayer = N_CHECK(ctx->network()->addConcatenation(tensors.data(), tensors.size())); - ctx->registerLayer(concatLayer, node); - ASSERT_NODE(concatLayer, "Failed to register layer.", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); - concatLayer->setAxis(0); - onnxPaddingPtr = N_CHECK(concatLayer->getOutput(0)); - } - - ASSERT_NODE(onnxPaddingPtr->getDimensions().d[0] == nbDims * 2, - "pads should be a 1D tensor of shape " << 2 * nbDims - << ", actual shape is: " << onnxPaddingPtr->getDimensions().d[0], - node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); - - auto pre = ctx->network() - ->addSlice( - *onnxPaddingPtr, nvinfer1::Dims{1, {0}}, nvinfer1::Dims{1, {nbDims}}, nvinfer1::Dims{1, {1}}) - ->getOutput(0); - auto post = ctx->network() - ->addSlice(*onnxPaddingPtr, nvinfer1::Dims{1, {nbDims}}, nvinfer1::Dims{1, {nbDims}}, - nvinfer1::Dims{1, {1}}) - ->getOutput(0); - - std::vector const zerosVal(nbDims, 0); - auto const zeros = addConstant(ctx, zerosVal, ::ONNX_NAMESPACE::TensorProto::INT64, - nvinfer1::Dims{ - 1, {nbDims}})->getOutput(0); - start = getElementWiseResult(ctx, *zeros, *pre, nvinfer1::ElementWiseOperation::kSUB); - auto const totalPadding = getElementWiseResult(ctx, *pre, *post, nvinfer1::ElementWiseOperation::kSUM); - size = getElementWiseResult(ctx, shapeOf(*tensorPtr).tensor(ctx), *totalPadding, nvinfer1::ElementWiseOperation::kSUM); - } - else + ShapeTensor beginPads; + ShapeTensor endPads; + int32_t const padAxesSize = padAxes.size(); + if (!onnxPadding.empty() && padAxes.allValuesKnown()) { // The pads is from initializer or attributes. // Passthrough path for no-op padding. @@ -3955,45 +4013,64 @@ DEFINE_BUILTIN_OP_IMPORTER(Pad) LOG_VERBOSE("Found no-op pad in node: " + getNodeName(node)); RETURN_IDENTITY(inputs.at(0), node, nodeIdx); } - // If padAxes is non-empty, update onnxPadding combining information from padAxes. - if (!padAxes.empty()) - { - // Sanity check. - ASSERT_NODE(onnxPadding.size() == padAxes.size() * 2, - "Length of pads input must be twice the length of axes input.", node, nodeIdx, - ErrorCode::kINVALID_NODE); - - // Map axes to onnxPadding and build a temporary vector combining the information held by onnxPadding & - // padAxes. It is: a) of length 2 * rank(input) b) ordered by axis c) of the format [x1_begin, x2_begin, - // ..., x1_end, x2_end,...] - std::vector tempOnnxPadding(2 * nbDims, 0); - for (size_t idx = 0; idx < padAxes.size(); idx++) - { - int32_t const currAxis = padAxes[idx]; - tempOnnxPadding[currAxis] = onnxPadding[idx]; // x_begin. - tempOnnxPadding[nbDims + currAxis] = onnxPadding[padAxes.size() + idx]; // x_end. - } + // Sanity check. + ASSERT_NODE(static_cast(onnxPadding.size()) == padAxesSize * 2, + "Length of pads input must be twice the length of axes input.", node, nodeIdx, ErrorCode::kINVALID_NODE); - // Update onnxPadding to hold the combined information. - onnxPadding = std::move(tempOnnxPadding); - } - nvinfer1::ITensor* totalPadding = nullptr; - ASSERT_NODE(convertOnnxPadding(ctx, nbDims, onnxPadding, start, totalPadding), "Failed to convert padding!", + std::vector beginPadsVec(onnxPadding.begin(), onnxPadding.begin() + padAxesSize); + std::vector endPadsVec(onnxPadding.begin() + padAxesSize, onnxPadding.end()); + beginPads = ShapeTensor(1, std::move(beginPadsVec)); + endPads = ShapeTensor(1, std::move(endPadsVec)); + } + else + { + nvinfer1::ITensor* onnxPaddingPtr = &convertToTensor(inputs.at(1), ctx); + ASSERT_NODE((onnxPaddingPtr->getDimensions().nbDims == 1), + "The padding input must be 1D. The rank of padding input = " << onnxPaddingPtr->getDimensions().nbDims + << ".", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); - size - = ctx->network() - ->addElementWise(shapeOf(*tensorPtr).tensor(ctx), *totalPadding, nvinfer1::ElementWiseOperation::kSUM) - ->getOutput(0); + ASSERT_NODE(onnxPaddingPtr->getDimensions().d[0] == padAxesSize * 2, + "pads should be twice the length of input axes i.e. " + << 2 * padAxesSize << ", actual length is: " << onnxPaddingPtr->getDimensions().d[0], + node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); + + // onnxPaddingPtr is of the format [x1_begin, x2_begin, ..., x1_end, x2_end,...]. + ShapeTensor const paddingLen = gather(ctx, shapeOf(*onnxPaddingPtr), shapeVector(0)); + ShapeTensor const halfPaddingLen = floorDiv(ctx, paddingLen, shapeVector(2)); + // Obtain begins [x1_begin, x2_begin, ...,]. + nvinfer1::ISliceLayer* beginSliceLayer + = addSlice(ctx, *onnxPaddingPtr, shapeVector(0), halfPaddingLen, shapeVector(1)); + ctx->registerLayer(beginSliceLayer, node); + beginPads = ShapeTensor{*(beginSliceLayer->getOutput(0))}; + // Obtain ends [x1_end, x2_end, ...]. + nvinfer1::ISliceLayer* endSliceLayer + = addSlice(ctx, *onnxPaddingPtr, halfPaddingLen, halfPaddingLen, shapeVector(1)); + ctx->registerLayer(endSliceLayer, node); + endPads = ShapeTensor{*(endSliceLayer->getOutput(0))}; } - // add slice node - auto const stride = makeDims(nbDims, 1); - auto const& dummy = stride; - auto* layer = N_CHECK(ctx->network()->addSlice(*tensorPtr, dummy, dummy, stride)); + if (padAxes.allValuesKnown()) + { + // gather() requires indices to be normalized if their values are known + CHECK_STATUS(normalizeAxes(padAxes, nbDims)); + } + auto axesDims = gather(ctx, tensorDims, padAxes); + ShapeTensor const zeros = similar(ctx, beginPads, 0); + ShapeTensor start = sub(ctx, zeros, beginPads); + ShapeTensor size = add(ctx, axesDims, add(ctx, beginPads, endPads)); + ShapeTensor const stride = similar(ctx, start, 1); + + auto* layer = N_CHECK(addSlice(ctx, *tensorPtr, start, size, stride)); ASSERT_NODE(layer, "Could not create padding layer", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); - layer->setInput(1, *start); - layer->setInput(2, *size); + if (padAxes.allValuesKnown()) + { + layer->setAxes(shapeTensorToDims(padAxes, "slice axes", -nbDims, nbDims - 1)); + } + else + { + layer->setInput(5, convertToTensor(inputs.at(3), ctx)); + } if (mode == "constant") { layer->setMode(nvinfer1::SampleMode::kFILL); @@ -5348,24 +5425,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Slice) if (axes.allValuesKnown()) { // gather() requires indices to be normalized if their values are known - std::vector newAxes; - newAxes.reserve(axes.size()); - for (int64_t axis : axes) - { - // "Accepted range is [-r, r-1] where r = rank(data)." - int32_t const r = dims.size(); - ASSERT_NODE((-r <= axis && axis < r), - "The range of axis must be in [-r, r-1], where r is the rank of input data. Provided axis = " - << axis << ", r = " << r << ".", - node, nodeIdx, ErrorCode::kINVALID_VALUE); - // "Negative value means counting dimensions from the back." - if (axis < 0) - { - axis += r; - } - newAxes.push_back(axis); - } - axes = ShapeTensor(1, std::move(newAxes)); + CHECK_STATUS(normalizeAxes(axes, dims.size())); } // Get dimensions of dims that correspond to axes for the computation of sizes auto const axesDims = gather(ctx, dims, axes); diff --git a/onnx_tensorrt/__init__.py b/onnx_tensorrt/__init__.py index 45acd8d..afef8f2 100644 --- a/onnx_tensorrt/__init__.py +++ b/onnx_tensorrt/__init__.py @@ -4,4 +4,4 @@ from . import backend -__version__ = "10.3.0" +__version__ = "10.4.0"