From 6872a9473391a73b96741711d52b98c2c3e25146 Mon Sep 17 00:00:00 2001 From: Kevin Chen Date: Fri, 10 Mar 2023 08:08:06 -0800 Subject: [PATCH] ONNX-TensorRT 8.6-EA release Signed-off-by: Kevin Chen --- CMakeLists.txt | 4 +- ConditionalHelpers.cpp | 39 ++- ConditionalHelpers.hpp | 15 +- ImporterContext.cpp | 154 +++++++++- ImporterContext.hpp | 59 +++- ModelImporter.cpp | 133 ++++++--- ModelImporter.hpp | 51 +++- NvOnnxParser.h | 357 +++++++++++++++-------- README.md | 8 +- TensorOrWeights.hpp | 18 +- builtin_op_importers.cpp | 602 ++++++++++++++++++--------------------- docs/Changelog.md | 17 ++ docs/operators.md | 21 +- onnx2trt.hpp | 26 +- onnx2trt_utils.cpp | 348 +++++++++++++++------- onnx2trt_utils.hpp | 14 +- trt_utils.hpp | 7 +- 17 files changed, 1223 insertions(+), 650 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 23f9ea73..21abe1c9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,8 +28,8 @@ add_definitions("-DSOURCE_LENGTH=${SOURCE_LENGTH}") # Version information #-------------------------------------------------- set(ONNX2TRT_MAJOR 8) -set(ONNX2TRT_MINOR 5) -set(ONNX2TRT_PATCH 1) +set(ONNX2TRT_MINOR 6) +set(ONNX2TRT_PATCH 0) set(ONNX2TRT_VERSION "${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}.${ONNX2TRT_PATCH}" CACHE STRING "ONNX2TRT version") #-------------------------------------------------- diff --git a/ConditionalHelpers.cpp b/ConditionalHelpers.cpp index 8a01f0a4..1b222c91 100644 --- a/ConditionalHelpers.cpp +++ b/ConditionalHelpers.cpp @@ -15,7 +15,7 @@ using LayerName = std::string; using InputIndex = int32_t; // A SubgraphPortsMap maps either the inputs or outputs ports of each node in an ONNX graph. -using SubgraphPortsMap = std::unordered_map>; +using SubgraphPortsMap = std::unordered_map>; // An InputsMap tracks which IIfConditionalInputLayer we've added to a layer's inputs, // so that we can reuse them if needed. @@ -27,7 +27,7 @@ using InputsMap = std::unordered_mapgetName(); return layerName.compare(0, key.size(), key) == 0; }); } @@ -57,7 +57,7 @@ Status addConditionalInputLayer(IImporterContext* ctx, nvinfer1::IIfConditional* inputLayer = conditional->addInput(*input); inputsMap[name] = inputLayer; const std::string inputLayerName(name); - ctx->registerLayer(inputLayer, inputLayerName + "_InputLayer"); + ctx->registerLayer(inputLayer, inputLayerName + "_InputLayer", nullptr); // Note: Since multiple conditionals may use the same external tensor, check unique names for output tensors of // IfConditionalInputLayers to avoid tensor name duplication. ctx->registerTensor( @@ -132,9 +132,8 @@ Status addIfInputLayers(IImporterContext* ctx, nvinfer1::IIfConditional* conditi { // Find all of the tensors entering the subgraph. // The node-names are from the ONNX context. - using NodeName = std::string; using InputIndex = int32_t; - std::unordered_map> subgraphInputsMap; + std::unordered_map> subgraphInputsMap; getSubgraphInputs(newLayers, subgraphInputsMap); // Add a ConditionalInputLayer in front of each input that is external to the subgraph. @@ -166,9 +165,8 @@ Status addIfOutputLayers(IImporterContext* ctx, nvinfer1::IIfConditional* condit } }; - using NodeName = std::string; - std::unordered_map> thenOutputs; - std::unordered_map> elseOutputs; + std::unordered_map> thenOutputs; + std::unordered_map> elseOutputs; std::vector thenReportedOutputs; getReportedOutputs(thenGraph, thenReportedOutputs); @@ -182,14 +180,9 @@ Status addIfOutputLayers(IImporterContext* ctx, nvinfer1::IIfConditional* condit = [](IImporterContext* ctx, std::vector& sgOutputs, SubgraphPortsMap& subgraphOutputs, ::ONNX_NAMESPACE::GraphProto const& subgraph, std::vector subgraphLayers, StringMap const& subgraphTensors) { - for (const auto& layer : subgraphLayers) + for (auto const& pair : subgraphOutputs) { - const auto layerName = layer->getName(); - auto iter = findLayer(subgraphOutputs, layerName); - if (iter != subgraphOutputs.end()) - { - sgOutputs.push_back(layer->getOutput(0)); - } + sgOutputs.push_back(pair.first); } if (sgOutputs.empty()) @@ -221,7 +214,7 @@ Status addIfOutputLayers(IImporterContext* ctx, nvinfer1::IIfConditional* condit for (size_t i = 0; i < elseSGOutputTensors.size(); i++) { auto* outputLayer = conditional->addOutput(*thenOutputTensors[i], *elseSGOutputTensors[i]); - ctx->registerLayer(outputLayer, std::string(conditional->getName()) + "_OutputLayer"); + ctx->registerLayer(outputLayer, std::string(conditional->getName()) + "_OutputLayer", nullptr); graphOutputs.emplace_back(outputLayer->getOutput(0)); } return Status::success(); @@ -229,7 +222,7 @@ Status addIfOutputLayers(IImporterContext* ctx, nvinfer1::IIfConditional* condit // Given a subgraph, find all of its external inputs/outputs (tensors entering/exiting the subgraph). Status getSubgraphTensors(const std::vector& newLayers, - std::unordered_map>& externalOutputs, bool extractOutputs, + std::unordered_map>& externalOutputs, bool extractOutputs, const std::vector* reportedOutputs = nullptr) { using NodeName = std::string; @@ -271,7 +264,7 @@ Status getSubgraphTensors(const std::vector& newLayers, }; // Retrieve the list of tensors either exiting or entering the subgraph. - std::unordered_map> externalPortsMap; + std::unordered_map> externalPortsMap; auto filterTensors = [&](TensorsSet const& tensors, auto getNodeAccessor) { for (nvinfer1::ILayer const* l : newLayers) { @@ -307,7 +300,7 @@ Status getSubgraphTensors(const std::vector& newLayers, } if (!reportedOutputs || prefixFound) { - externalPortsMap[tensorName].push_back(std::make_pair(nodeName, i)); + externalPortsMap[tensor].push_back(std::make_pair(nodeName, i)); } } i++; @@ -330,23 +323,23 @@ Status getSubgraphTensors(const std::vector& newLayers, { for (const Port& inPort : input.second) { - auto const nodeName = inPort.first; + auto* tensor = input.first; auto const portIndex = inPort.second; - externalOutputs[nodeName].insert(portIndex); + externalOutputs[tensor].insert(portIndex); } } return Status::success(); } Status getSubgraphOutputs(const std::vector& newLayers, - std::unordered_map>& externalOutputs, + std::unordered_map>& externalOutputs, const std::vector& reportedOutputs) { return getSubgraphTensors(newLayers, externalOutputs, true, &reportedOutputs); } Status getSubgraphInputs(const std::vector& newLayers, - std::unordered_map>& externalInputs) + std::unordered_map>& externalInputs) { return getSubgraphTensors(newLayers, externalInputs, false); } diff --git a/ConditionalHelpers.hpp b/ConditionalHelpers.hpp index fb7d6feb..e4618f9b 100644 --- a/ConditionalHelpers.hpp +++ b/ConditionalHelpers.hpp @@ -19,17 +19,16 @@ namespace onnx2trt { // Given a subgraph, find all of its external inputs (tensors entering the subgraph). -// The result is returned in `subgraphInputs`, which is a map indexed by layer-name and with values indicating a set -// of external input indices. -Status getSubgraphInputs( - const std::vector& newLayers, - std::unordered_map>& subgraphInputs); +// The result is returned in `subgraphInputs`, which is a map indexed by ITensor (a tensor entering the subgraph) and +// with values indicating a set of external input indices. +Status getSubgraphInputs(std::vector const& newLayers, + std::unordered_map>& subgraphInputs); // Given a subgraph, find all of its external outputs (tensors exiting the subgraph). -// The result is returned in `subgraphInputs`, which is a map indexed by layer-name and with values indicating a set -// of external outputs indices. +// The result is returned in `subgraphInputs`, which is a map indexed by ITensor (a tensor exiting the subgraph) and +// with values indicating a set of external outputs indices. Status getSubgraphOutputs(const std::vector& newLayers, - std::unordered_map>& subgraphOutputs, + std::unordered_map>& subgraphOutputs, const std::vector& reportedOutputs); // Take a snapshot of the network before and after parsing the subgraph and return a list diff --git a/ImporterContext.cpp b/ImporterContext.cpp index dfd1d684..94d52615 100644 --- a/ImporterContext.cpp +++ b/ImporterContext.cpp @@ -3,6 +3,26 @@ */ #include "ImporterContext.hpp" +#include "NvInferVersion.h" +#include + +#if !defined(_WIN32) +#include +#if defined(__linux__) +#include +#endif +#else // defined(_WIN32) +#include +#endif // !defined(_WIN32) + +#define RT_ASSERT(cond) \ + do \ + { \ + if (!(cond)) \ + { \ + throw std::runtime_error("Assertion " #cond " failed!"); \ + } \ + } while (0) namespace onnx2trt { @@ -89,7 +109,7 @@ void ImporterContext::registerTensor(TensorOrWeights tensor, std::string const& p.first->second = std::move(tensor); } -void ImporterContext::registerLayer(nvinfer1::ILayer* layer, std::string const& basename) +void ImporterContext::registerLayer(nvinfer1::ILayer* layer, std::string const& basename, ::ONNX_NAMESPACE::NodeProto const* node) { // No layer will be added for Constant nodes in ONNX. if (layer) @@ -111,6 +131,138 @@ void ImporterContext::registerLayer(nvinfer1::ILayer* layer, std::string const& mConstantLayers.insert({uniqueName, static_cast(layer)}); } } + if (node != nullptr) + { + processMetadata(*node, layer); + } +} + +void ImporterContext::registerLayer(nvinfer1::ILayer* layer, ::ONNX_NAMESPACE::NodeProto const& node) +{ + std::string const& basename = getNodeName(node); + registerLayer(layer, basename, &node); +} + +namespace +{ + +//! Translates a "logical" library name into an OS-dependent DSO or DLL name +std::string getOSLibraryName(char const* logicalName) +{ + std::stringstream libName; +#if defined(_WIN32) + libName << logicalName << ".dll"; +#else + libName << "lib" << logicalName << ".so." << NV_TENSORRT_SONAME_MAJOR; +#endif + return libName.str(); +} + +//! Platform-agnostic wrapper around dynamic libraries. +class DynamicLibrary +{ +public: + explicit DynamicLibrary(std::string const& name) + : mLibName{name} + { +#if defined(_WIN32) + mHandle = LoadLibraryA(name.c_str()); +#else // defined(_WIN32) + int32_t flags{RTLD_LAZY}; + mHandle = dlopen(name.c_str(), flags); +#endif // defined(_WIN32) + + if (mHandle == nullptr) + { + std::string errorStr{}; +#if !defined(_WIN32) + errorStr = std::string{" due to "} + std::string{dlerror()}; +#endif + throw std::runtime_error("Unable to open library: " + name + errorStr); + } + } + + DynamicLibrary(DynamicLibrary const&) = delete; + DynamicLibrary(DynamicLibrary const&&) = delete; + + ~DynamicLibrary() + { + try + { +#if defined(_WIN32) + RT_ASSERT(static_cast(FreeLibrary(static_cast(mHandle)))); +#else + RT_ASSERT(dlclose(mHandle) == 0); +#endif + } + catch (...) + { + std::cerr << "Unable to close library: " << mLibName << std::endl; + } + } + + std::string getFullPath() const + { + RT_ASSERT(mHandle != nullptr); +#if defined(__linux__) + link_map* linkMap = nullptr; + auto const err = dlinfo(mHandle, RTLD_DI_LINKMAP, &linkMap); + RT_ASSERT(err == 0 && linkMap != nullptr && linkMap->l_name != nullptr); + return std::string{linkMap->l_name}; +#elif defined(_WIN32) + constexpr int32_t kMAX_PATH_LEN{4096}; + std::string path(kMAX_PATH_LEN, '\0'); // since C++11, std::string storage is guaranteed to be contiguous + auto const pathLen = GetModuleFileNameA(static_cast(mHandle), &path[0], kMAX_PATH_LEN); + RT_ASSERT(GetLastError() == ERROR_SUCCESS); + path.resize(pathLen); + path.shrink_to_fit(); + return path; +#else + RT_ASSERT(!"Unsupported operation: getFullPath()"); +#endif + } + +private: + std::string mLibName{}; //!< Name of the DynamicLibrary + void* mHandle{}; //!< Handle to the DynamicLibrary +}; + +//! Translates an OS-dependent DSO/DLL name into a path on the filesystem +std::string getOSLibraryPath(std::string const& osLibName) +{ + DynamicLibrary lib{osLibName}; + return lib.getFullPath(); +} + +} // namespace + +void ImporterContext::addUsedVCPluginLibrary( + ::ONNX_NAMESPACE::NodeProto const& node, char const* pluginName, char const* pluginLib) +{ + auto* ctx = this; // For logging + auto osPluginLibName = getOSLibraryName(pluginLib); + LOG_VERBOSE("Node " << getNodeName(node) << " requires plugin " << pluginName << " which is provided by " + << osPluginLibName); + mLogicalVCPluginLibraries.insert(osPluginLibName); +} + +std::vector ImporterContext::getUsedVCPluginLibraries() +{ + auto* ctx = this; // For logging +#if defined(_WIN32) || defined(__linux__) + std::vector ret; + ret.reserve(mLogicalVCPluginLibraries.size()); + for (auto const& l : mLogicalVCPluginLibraries) + { + auto osLibPath = getOSLibraryPath(l); + LOG_VERBOSE("Library " << l << " located on filesystem as " << osLibPath); + ret.emplace_back(std::move(osLibPath)); + } + return ret; +#else + LOG_WARNING("getUsedVCPluginLibraries not implemented on platform!"); + return {}; +#endif } } // namespace onnx2trt diff --git a/ImporterContext.hpp b/ImporterContext.hpp index 85b51b28..922b53b1 100644 --- a/ImporterContext.hpp +++ b/ImporterContext.hpp @@ -89,6 +89,15 @@ class ImporterContext final : public IImporterContext std::string mOnnxFileLocation; // Keep track of the directory of the parsed ONNX file std::unique_ptr mErrorWrapper; // error recorder to control TRT errors StringMap mConstantLayers; + bool mConvertINT64Logged{false}; + bool mConvertINT64OutOfBoundsLogged{false}; + bool mConvertDoubleLogged{false}; + bool mConvertDoubleOutOfBoundsLogged{false}; + nvonnxparser::OnnxParserFlags mOnnxParserFlags; // OnnxParserFlags specified by the parser + + // Logical library names for VC plugin libraries. This gets translated to library paths + // when getUsedVCPluginLibraries() is called. + std::set mLogicalVCPluginLibraries; //! Stack of names defined by nested ONNX graphs, with information about how to //! restore their associated values when popping back to the surrounding scope. @@ -161,7 +170,8 @@ class ImporterContext final : public IImporterContext void registerTensor( TensorOrWeights tensor, std::string const& basename, bool const checkUniqueName = false) override; - void registerLayer(nvinfer1::ILayer* layer, std::string const& basename) override; + void registerLayer(nvinfer1::ILayer* layer, std::string const& basename, ::ONNX_NAMESPACE::NodeProto const* node) override; + void registerLayer(nvinfer1::ILayer* layer, ::ONNX_NAMESPACE::NodeProto const& node) override; nvinfer1::ILogger& logger() override { @@ -266,6 +276,53 @@ class ImporterContext final : public IImporterContext return iter->second; } + void setFlags(nvonnxparser::OnnxParserFlags const& onnxParserFlags) override + { + mOnnxParserFlags = onnxParserFlags; + } + nvonnxparser::OnnxParserFlags getFlags() const override + { + return mOnnxParserFlags; + } + + virtual void addUsedVCPluginLibrary( + ::ONNX_NAMESPACE::NodeProto const& node, char const* pluginName, char const* pluginLib) final; + + virtual std::vector getUsedVCPluginLibraries() final; + + bool isConvertINT64Logged() + { + return mConvertINT64Logged; + } + void setConvertINT64Logged(bool logged) + { + mConvertINT64Logged = logged; + } + bool isConvertINT64OutOfBoundsLogged() + { + return mConvertINT64OutOfBoundsLogged; + } + void setConvertINT64OutOfBoundsLogged(bool logged) + { + mConvertINT64OutOfBoundsLogged = logged; + } + bool isConvertDoubleLogged() + { + return mConvertDoubleLogged; + } + void setConvertDoubleLogged(bool logged) + { + mConvertDoubleLogged = logged; + } + bool isConvertDoubleOutOfBoundsLogged() + { + return mConvertDoubleOutOfBoundsLogged; + } + void setConvertDoubleOutOfBoundsLogged(bool logged) + { + mConvertDoubleOutOfBoundsLogged = logged; + } + private: std::string const& generateUniqueName(std::set& namesSet, const std::string& basename) { diff --git a/ModelImporter.cpp b/ModelImporter.cpp index 720f4ce8..b280a783 100644 --- a/ModelImporter.cpp +++ b/ModelImporter.cpp @@ -20,6 +20,16 @@ namespace onnx2trt { +// Helper class and object to shutdown protobuf library upon library unload. +class ProtobufShutter { + public: + ~ProtobufShutter() + { + google::protobuf::ShutdownProtobufLibrary(); + } +}; + +static ProtobufShutter protobufShutter; // Helper for deserializing INetwork Status setTensorLocations( @@ -209,6 +219,10 @@ Status parseGraph( } } + ASSERT((node.output().size() <= static_cast(outputs.size())) + && "Node has more output tensors than TRT expected.", + ErrorCode::kINVALID_GRAPH); + // Set output names and register outputs with the context. std::ostringstream ssOutputs{}; ssOutputs << nodeName << " [" << node.op_type() << "] outputs: "; @@ -224,6 +238,20 @@ Status parseGraph( { ctx->registerTensor(std::move(output), outputName); } + // UINT8 is only allowed as network inputs and outputs. Therefore any node that produces an UINT8-typed + // output that is not also a graph output is unsupported. + if (output.getType() == "UINT8") + { + bool legalUINT8 = false; + for (auto const& graphOutput : graph.output()) + { + if (graphOutput.name() == outputName) + { + legalUINT8 = true; + } + } + ASSERT(legalUINT8 && "TensorRT does not support UINT8 types for intermediate tensors!", ErrorCode::kUNSUPPORTED_NODE); + } } LOG_VERBOSE(ssOutputs.str()); } @@ -355,13 +383,13 @@ bool ModelImporter::supportsModel(void const* serialized_onnx_model, size_t seri if (status.is_error()) { - _errors.push_back(status); + mErrors.push_back(status); return false; } if (model_path) { - _importer_ctx.setOnnxFileLocation(model_path); + mImporterCtx.setOnnxFileLocation(model_path); } bool allSupported{true}; @@ -393,7 +421,7 @@ bool ModelImporter::supportsModel(void const* serialized_onnx_model, size_t seri } } } - auto* ctx = &_importer_ctx; + auto* ctx = &mImporterCtx; auto checkForInput = [&input_node, &ctx](::ONNX_NAMESPACE::NodeProto const& node) { for (auto input : node.input()) { @@ -474,25 +502,25 @@ bool ModelImporter::supportsOperator(char const* op_name) const bool ModelImporter::parseWithWeightDescriptors(void const* serialized_onnx_model, size_t serialized_onnx_model_size) { - _current_node = -1; + mCurrentNode = -1; // TODO: This function (and its overload below) could do with some cleaning, // particularly wrt error handling. // Note: We store a copy of the model so that weight arrays will persist - _onnx_models.emplace_back(); - ::ONNX_NAMESPACE::ModelProto& model = _onnx_models.back(); + mONNXModels.emplace_back(); + ::ONNX_NAMESPACE::ModelProto& model = mONNXModels.back(); bool is_serialized_as_text = false; Status status = deserialize_onnx_model(serialized_onnx_model, serialized_onnx_model_size, is_serialized_as_text, &model); if (status.is_error()) { - _errors.push_back(status); + mErrors.push_back(status); return false; } status = this->importModel(model); if (status.is_error()) { - status.setNode(_current_node); - _errors.push_back(status); + status.setNode(mCurrentNode); + mErrors.push_back(status); return false; } return true; @@ -500,7 +528,8 @@ bool ModelImporter::parseWithWeightDescriptors(void const* serialized_onnx_model bool ModelImporter::parse(void const* serialized_onnx_model, size_t serialized_onnx_model_size, const char* model_path) { - auto* const ctx = &_importer_ctx; + auto* const ctx = &mImporterCtx; + if (ctx->network()->getNbLayers() > 0) { LOG_ERROR("Parse was called with a non-empty network definition"); @@ -508,17 +537,16 @@ bool ModelImporter::parse(void const* serialized_onnx_model, size_t serialized_o } if (model_path) { - _importer_ctx.setOnnxFileLocation(model_path); + mImporterCtx.setOnnxFileLocation(model_path); } return this->parseWithWeightDescriptors(serialized_onnx_model, serialized_onnx_model_size); } -Status ModelImporter::importModel( - ::ONNX_NAMESPACE::ModelProto const& model) +Status ModelImporter::importModel(::ONNX_NAMESPACE::ModelProto const& model) { - ASSERT(!_importer_ctx.network()->hasImplicitBatchDimension() && "This version of the ONNX parser only supports TensorRT INetworkDefinitions with an explicit batch dimension. Please ensure the network was created using the EXPLICIT_BATCH NetworkDefinitionCreationFlag.", ErrorCode::kINVALID_VALUE); - auto* ctx = &_importer_ctx; - _importer_ctx.clearOpsets(); + ASSERT(!mImporterCtx.network()->hasImplicitBatchDimension() && "This version of the ONNX parser only supports TensorRT INetworkDefinitions with an explicit batch dimension. Please ensure the network was created using the EXPLICIT_BATCH NetworkDefinitionCreationFlag.", ErrorCode::kINVALID_VALUE); + auto* ctx = &mImporterCtx; + mImporterCtx.clearOpsets(); #if ENABLE_STD_PLUGIN // Initialize plugin registry initLibNvInferPlugins(static_cast(&ctx->logger()), ""); @@ -531,30 +559,35 @@ Status ModelImporter::importModel( // ONNX spec says that the default domain is either an empty string or is "ai.onnx". if ((domain.empty() || domain == "ai.onnx") && version < 7) { - LOG_WARNING("TensorRT supports ONNX graphs generated with at least opset 7. Models using older opsets are not guaranteed to work."); + LOG_WARNING( + "TensorRT supports ONNX graphs generated with at least opset 7. Models using older opsets are not " + "guaranteed to work."); } - _importer_ctx.addOpset(domain, version); + mImporterCtx.addOpset(domain, version); } ::ONNX_NAMESPACE::GraphProto const& graph = model.graph(); // Create a dummy tensors so that we can reserve output names. If the output names are encountered elsewhere // in the graph, the ctx will know to make the names unique. for (::ONNX_NAMESPACE::ValueInfoProto const& output : graph.output()) { - _importer_ctx.registerTensor(TensorOrWeights{}, output.name()); + mImporterCtx.registerTensor(TensorOrWeights{}, output.name()); } - _current_node = -1; - CHECK(importInputs(&_importer_ctx, graph, &_importer_ctx.tensors())); - CHECK(parseGraph(&_importer_ctx, graph, model.producer_name() == "TensorRT", &_current_node)); + // Propagate OnnxParserFlags down to the importer context. + mImporterCtx.setFlags(getFlags()); + + mCurrentNode = -1; + CHECK(importInputs(&mImporterCtx, graph, &mImporterCtx.tensors())); + CHECK(parseGraph(&mImporterCtx, graph, model.producer_name() == "TensorRT", &mCurrentNode)); - _current_node = -1; + mCurrentNode = -1; // Mark outputs defined in the ONNX model (unless tensors are user-requested) for (::ONNX_NAMESPACE::ValueInfoProto const& output : graph.output()) { - ASSERT((_importer_ctx.tensors().count(output.name())) && "The output tensor was not registered.", + ASSERT((mImporterCtx.tensors().count(output.name())) && "The output tensor was not registered.", ErrorCode::kINVALID_GRAPH); nvinfer1::ITensor* output_tensor_ptr - = &convertToTensor(_importer_ctx.tensors().at(output.name()), &_importer_ctx); + = &convertToTensor(mImporterCtx.tensors().at(output.name()), &mImporterCtx); LOG_VERBOSE("Marking " << output_tensor_ptr->getName() << " as output: " << output.name()); output_tensor_ptr->setName(output.name().c_str()); @@ -563,17 +596,19 @@ Status ModelImporter::importModel( // HACK WAR for TRT not allowing input == output // TODO: Does this break things by changing the name of the input tensor? output_tensor_ptr->setName(("__" + output.name()).c_str()); - output_tensor_ptr = &identity(&_importer_ctx, output_tensor_ptr).tensor(); + output_tensor_ptr = &identity(&mImporterCtx, output_tensor_ptr).tensor(); ASSERT(output_tensor_ptr && "Failed to add an Identity layer.", ErrorCode::kUNSUPPORTED_NODE); output_tensor_ptr->setName(output.name().c_str()); } - nvinfer1::ITensor** user_output = _importer_ctx.getUserOutput(output.name().c_str()); + nvinfer1::ITensor** user_output = mImporterCtx.getUserOutput(output.name().c_str()); if (!user_output) { - _importer_ctx.network()->markOutput(*output_tensor_ptr); + mImporterCtx.network()->markOutput(*output_tensor_ptr); nvinfer1::DataType output_trt_dtype; - ASSERT(convertDtype(output.type().tensor_type().elem_type(), &output_trt_dtype) && "Failed to convert ONNX date type to TensorRT data type.", ErrorCode::kUNSUPPORTED_NODE); + ASSERT(convertDtype(output.type().tensor_type().elem_type(), &output_trt_dtype) + && "Failed to convert ONNX date type to TensorRT data type.", + ErrorCode::kUNSUPPORTED_NODE); // For INT32 data type, output type must match tensor type ASSERT( (output_tensor_ptr->getType() != nvinfer1::DataType::kINT32 || output_trt_dtype == nvinfer1::DataType::kINT32) && "For INT32 tensors, the output type must also be INT32.", @@ -583,13 +618,14 @@ Status ModelImporter::importModel( } } // Return user-requested output tensors - for (auto user_output_entry : _importer_ctx.getUserOutputs()) + for (auto user_output_entry : mImporterCtx.getUserOutputs()) { std::string user_output_name = user_output_entry.first; nvinfer1::ITensor** user_output_ptr = user_output_entry.second; - ASSERT( (_importer_ctx.tensors().count(user_output_name)) && "The user-requested output was not registered.", ErrorCode::kINVALID_VALUE); - TensorOrWeights user_output = _importer_ctx.tensors().at(user_output_name); - ASSERT( (user_output.is_tensor()) && "The user-requested output must be a tensor.", ErrorCode::kINVALID_VALUE); + ASSERT((mImporterCtx.tensors().count(user_output_name)) && "The user-requested output was not registered.", + ErrorCode::kINVALID_VALUE); + TensorOrWeights user_output = mImporterCtx.tensors().at(user_output_name); + ASSERT((user_output.is_tensor()) && "The user-requested output must be a tensor.", ErrorCode::kINVALID_VALUE); *user_output_ptr = &user_output.tensor(); } @@ -598,25 +634,25 @@ Status ModelImporter::importModel( // iterate over all tensors in the network and add them to "tensors" map string_map tensors; string_map layers; - for (int32_t idx = 0; idx < _importer_ctx.network()->getNbInputs(); ++idx) + for (int32_t idx = 0; idx < mImporterCtx.network()->getNbInputs(); ++idx) { - nvinfer1::ITensor* tensor = _importer_ctx.network()->getInput(idx); + nvinfer1::ITensor* tensor = mImporterCtx.network()->getInput(idx); if (tensor != nullptr) { tensors[tensor->getName()] = tensor; } } - for (int32_t idx = 0; idx < _importer_ctx.network()->getNbOutputs(); ++idx) + for (int32_t idx = 0; idx < mImporterCtx.network()->getNbOutputs(); ++idx) { - nvinfer1::ITensor* tensor = _importer_ctx.network()->getOutput(idx); + nvinfer1::ITensor* tensor = mImporterCtx.network()->getOutput(idx); if (tensor != nullptr) { tensors[tensor->getName()] = tensor; } } - for (int32_t layerIdx = 0; layerIdx < _importer_ctx.network()->getNbLayers(); ++layerIdx) + for (int32_t layerIdx = 0; layerIdx < mImporterCtx.network()->getNbLayers(); ++layerIdx) { - nvinfer1::ILayer* layer = _importer_ctx.network()->getLayer(layerIdx); + nvinfer1::ILayer* layer = mImporterCtx.network()->getLayer(layerIdx); for (int32_t idx = 0; idx < layer->getNbInputs(); ++idx) { nvinfer1::ITensor* tensor = layer->getInput(idx); @@ -660,12 +696,21 @@ Status ModelImporter::importModel( } } + // Regenerate the plugin library list + mPluginLibraryList = ctx->getUsedVCPluginLibraries(); + mPluginLibraryListCStr.clear(); + mPluginLibraryListCStr.reserve(mPluginLibraryList.size()); + for (auto const& s : mPluginLibraryList) + { + mPluginLibraryListCStr.push_back(s.c_str()); + } + return Status::success(); } bool ModelImporter::parseFromFile(char const* onnxModelFile, int32_t verbosity) { - auto* ctx = &_importer_ctx; + auto* ctx = &mImporterCtx; // Define S_ISREG macro for Windows #if !defined(S_ISREG) @@ -690,7 +735,7 @@ bool ModelImporter::parseFromFile(char const* onnxModelFile, int32_t verbosity) } // Keep track of the absolute path to the ONNX file. - _importer_ctx.setOnnxFileLocation(onnxModelFile); + mImporterCtx.setOnnxFileLocation(onnxModelFile); int64_t const opset_version = (onnx_model.opset_import().size() ? onnx_model.opset_import(0).version() : 0); LOG_INFO("----------------------------------------------------------------"); @@ -737,4 +782,10 @@ bool ModelImporter::parseFromFile(char const* onnxModelFile, int32_t verbosity) return true; } +char const* const* ModelImporter::getUsedVCPluginLibraries(int64_t& nbPluginLibs) const noexcept +{ + nbPluginLibs = mPluginLibraryListCStr.size(); + return (nbPluginLibs > 0) ? mPluginLibraryListCStr.data() : nullptr; +} + } // namespace onnx2trt diff --git a/ModelImporter.hpp b/ModelImporter.hpp index c2647307..b1c91bd2 100644 --- a/ModelImporter.hpp +++ b/ModelImporter.hpp @@ -23,15 +23,19 @@ class ModelImporter : public nvonnxparser::IParser virtual Status importModel(::ONNX_NAMESPACE::ModelProto const& model); private: - ImporterContext _importer_ctx; - std::list<::ONNX_NAMESPACE::ModelProto> _onnx_models; // Needed for ownership of weights - int _current_node; - std::vector _errors; + ImporterContext mImporterCtx; + std::vector mPluginLibraryList; // Array of strings containing plugin libs + std::vector + mPluginLibraryListCStr; // Array of C-strings corresponding to the strings in mPluginLibraryList + std::list<::ONNX_NAMESPACE::ModelProto> mONNXModels; // Needed for ownership of weights + int mCurrentNode; + std::vector mErrors; + nvonnxparser::OnnxParserFlags mOnnxParserFlags{0}; public: ModelImporter(nvinfer1::INetworkDefinition* network, nvinfer1::ILogger* logger) : _op_importers(getBuiltinOpImporterMap()) - , _importer_ctx(network, logger) + , mImporterCtx(network, logger) { } bool parseWithWeightDescriptors(void const* serialized_onnx_model, size_t serialized_onnx_model_size) override; @@ -40,26 +44,53 @@ class ModelImporter : public nvonnxparser::IParser SubGraphCollection_t& sub_graph_collection, const char* model_path = nullptr) override; bool supportsOperator(const char* op_name) const override; + + void setFlags(nvonnxparser::OnnxParserFlags onnxParserFlags) noexcept override + { + mOnnxParserFlags = onnxParserFlags; + } + nvonnxparser::OnnxParserFlags getFlags() const noexcept override + { + return mOnnxParserFlags; + } + + void clearFlag(nvonnxparser::OnnxParserFlag onnxParserFlag) noexcept override + { + mOnnxParserFlags &= ~(1U << static_cast(onnxParserFlag)); + } + + void setFlag(nvonnxparser::OnnxParserFlag onnxParserFlag) noexcept override + { + mOnnxParserFlags |= 1U << static_cast(onnxParserFlag); + } + + bool getFlag(nvonnxparser::OnnxParserFlag onnxParserFlag) const noexcept override + { + auto flag = 1U << static_cast(onnxParserFlag); + return static_cast(mOnnxParserFlags & flag); + } + void destroy() override { delete this; } int32_t getNbErrors() const override { - return _errors.size(); + return mErrors.size(); } nvonnxparser::IParserError const* getError(int32_t index) const override { - assert(0 <= index && index < (int32_t) _errors.size()); - return &_errors[index]; + assert(0 <= index && index < (int32_t) mErrors.size()); + return &mErrors[index]; } void clearErrors() override { - _errors.clear(); + mErrors.clear(); } - //...LG: Move the implementation to .cpp bool parseFromFile(char const* onnxModelFile, int32_t verbosity) override; + + virtual char const* const* getUsedVCPluginLibraries(int64_t& nbPluginLibs) const noexcept override; }; } // namespace onnx2trt diff --git a/NvOnnxParser.h b/NvOnnxParser.h index 132c12d9..8913ccf1 100644 --- a/NvOnnxParser.h +++ b/NvOnnxParser.h @@ -19,8 +19,10 @@ #define NV_ONNX_PARSER_MINOR 1 #define NV_ONNX_PARSER_PATCH 0 -static const int NV_ONNX_PARSER_VERSION = ((NV_ONNX_PARSER_MAJOR * 10000) + (NV_ONNX_PARSER_MINOR * 100) + NV_ONNX_PARSER_PATCH); +static constexpr int32_t NV_ONNX_PARSER_VERSION + = ((NV_ONNX_PARSER_MAJOR * 10000) + (NV_ONNX_PARSER_MINOR * 100) + NV_ONNX_PARSER_PATCH); +//! //! \typedef SubGraph_t //! //! \brief The data structure containing the parsing capability of @@ -28,6 +30,7 @@ static const int NV_ONNX_PARSER_VERSION = ((NV_ONNX_PARSER_MAJOR * 10000) + (NV_ //! typedef std::pair, bool> SubGraph_t; +//! //! \typedef SubGraphCollection_t //! //! \brief The data structure containing all SubGraph_t partitioned @@ -44,12 +47,13 @@ namespace nvonnxparser { template -inline int32_t EnumMax(); +constexpr inline int32_t EnumMax(); -/** \enum ErrorCode - * - * \brief the type of parser error - */ +//! +//! \enum ErrorCode +//! +//! \brief The type of error that the parser may return +//! enum class ErrorCode : int { kSUCCESS = 0, @@ -63,140 +67,256 @@ enum class ErrorCode : int kUNSUPPORTED_NODE = 8 }; +//! +//! Maximum number of flags in the ErrorCode enum. +//! +//! \see ErrorCode +//! template <> -inline int32_t EnumMax() +constexpr inline int32_t EnumMax() { return 9; } -/** \class IParserError - * - * \brief an object containing information about an error - */ +//! +//! \brief Represents one or more OnnxParserFlag values using binary OR +//! operations, e.g., 1U << OnnxParserFlag::kVERSION_COMPATIBLE +//! +//! \see IParser::setFlags() and IParser::getFlags() +//! +using OnnxParserFlags = uint32_t; + +enum class OnnxParserFlag : int32_t +{ + //! Parse the ONNX model into the INetworkDefinition with the intention of building a version-compatible engine in + //! TensorRT 8.6. This flag is planned to be deprecated in TensorRT 8.7, and removed in TensorRT 9.0. This will + //! choose TensorRT's native InstanceNormalization implementation over the plugin implementation. There may be + //! performance degradations when this flag is enabled. + kVERSION_COMPATIBLE = 0 +}; + +//! +//! Maximum number of flags in the OnnxParserFlag enum. +//! +//! \see OnnxParserFlag +//! +template <> +constexpr inline int32_t EnumMax() +{ + return 1; +} + +//! +//! \class IParserError +//! +//! \brief an object containing information about an error +//! class IParserError { public: - /** \brief the error code - */ + //! + //!\brief the error code + //! virtual ErrorCode code() const = 0; - /** \brief description of the error - */ + //! + //!\brief description of the error + //! virtual const char* desc() const = 0; - /** \brief source file in which the error occurred - */ + //! + //!\brief source file in which the error occurred + //! virtual const char* file() const = 0; - /** \brief source line at which the error occurred - */ + //! + //!\brief source line at which the error occurred + //! virtual int line() const = 0; - /** \brief source function in which the error occurred - */ + //! + //!\brief source function in which the error occurred + //! virtual const char* func() const = 0; - /** \brief index of the ONNX model node in which the error occurred - */ + //! + //!\brief index of the ONNX model node in which the error occurred + //! virtual int node() const = 0; protected: virtual ~IParserError() {} }; -/** \class IParser - * - * \brief an object for parsing ONNX models into a TensorRT network definition - */ +//! +//! \class IParser +//! +//! \brief an object for parsing ONNX models into a TensorRT network definition +//! class IParser { public: - /** \brief Parse a serialized ONNX model into the TensorRT network. - * This method has very limited diagnostics. If parsing the serialized model - * fails for any reason (e.g. unsupported IR version, unsupported opset, etc.) - * it the user responsibility to intercept and report the error. - * To obtain a better diagnostic, use the parseFromFile method below. - * - * \param serialized_onnx_model Pointer to the serialized ONNX model - * \param serialized_onnx_model_size Size of the serialized ONNX model - * in bytes - * \param model_path Absolute path to the model file for loading external weights if required - * \return true if the model was parsed successfully - * \see getNbErrors() getError() - */ - virtual bool parse(void const* serialized_onnx_model, - size_t serialized_onnx_model_size, - const char* model_path = nullptr) + //! + //! \brief Parse a serialized ONNX model into the TensorRT network. + //! This method has very limited diagnostics. If parsing the serialized model + //! fails for any reason (e.g. unsupported IR version, unsupported opset, etc.) + //! it the user responsibility to intercept and report the error. + //! To obtain a better diagnostic, use the parseFromFile method below. + //! + //! \param serialized_onnx_model Pointer to the serialized ONNX model + //! \param serialized_onnx_model_size Size of the serialized ONNX model + //! in bytes + //! \param model_path Absolute path to the model file for loading external weights if required + //! \return true if the model was parsed successfully + //! \see getNbErrors() getError() + //! + virtual bool parse( + void const* serialized_onnx_model, size_t serialized_onnx_model_size, const char* model_path = nullptr) = 0; - /** \brief Parse an onnx model file, which can be a binary protobuf or a text onnx model - * calls parse method inside. - * - * \param File name - * \param Verbosity Level - * - * \return true if the model was parsed successfully - * - */ + //! + //! \brief Parse an onnx model file, which can be a binary protobuf or a text onnx model + //! calls parse method inside. + //! + //! \param onnxModelFile name + //! \param verbosity Level + //! + //! \return true if the model was parsed successfully + //! + //! virtual bool parseFromFile(const char* onnxModelFile, int verbosity) = 0; - /** \brief Check whether TensorRT supports a particular ONNX model. - * If the function returns True, one can proceed to engine building - * without having to call \p parse or \p parseFromFile. - * - * \param serialized_onnx_model Pointer to the serialized ONNX model - * \param serialized_onnx_model_size Size of the serialized ONNX model - * in bytes - * \param sub_graph_collection Container to hold supported subgraphs - * \param model_path Absolute path to the model file for loading external weights if required - * \return true if the model is supported - */ - virtual bool supportsModel(void const* serialized_onnx_model, - size_t serialized_onnx_model_size, - SubGraphCollection_t& sub_graph_collection, - const char* model_path = nullptr) + //! + //!\brief Check whether TensorRT supports a particular ONNX model. + //! If the function returns True, one can proceed to engine building + //! without having to call \p parse or \p parseFromFile. + //! + //! \param serialized_onnx_model Pointer to the serialized ONNX model + //! \param serialized_onnx_model_size Size of the serialized ONNX model + //! in bytes + //! \param sub_graph_collection Container to hold supported subgraphs + //! \param model_path Absolute path to the model file for loading external weights if required + //! \return true if the model is supported + //! + virtual bool supportsModel(void const* serialized_onnx_model, size_t serialized_onnx_model_size, + SubGraphCollection_t& sub_graph_collection, const char* model_path = nullptr) = 0; - /** \brief Parse a serialized ONNX model into the TensorRT network - * with consideration of user provided weights - * - * \param serialized_onnx_model Pointer to the serialized ONNX model - * \param serialized_onnx_model_size Size of the serialized ONNX model - * in bytes - * \return true if the model was parsed successfully - * \see getNbErrors() getError() - */ - virtual bool parseWithWeightDescriptors( - void const* serialized_onnx_model, size_t serialized_onnx_model_size) - = 0; + //! + //!\brief Parse a serialized ONNX model into the TensorRT network + //! with consideration of user provided weights + //! + //! \param serialized_onnx_model Pointer to the serialized ONNX model + //! \param serialized_onnx_model_size Size of the serialized ONNX model + //! in bytes + //! \return true if the model was parsed successfully + //! \see getNbErrors() getError() + //! + virtual bool parseWithWeightDescriptors(void const* serialized_onnx_model, size_t serialized_onnx_model_size) = 0; - /** \brief Returns whether the specified operator may be supported by the - * parser. - * - * Note that a result of true does not guarantee that the operator will be - * supported in all cases (i.e., this function may return false-positives). - * - * \param op_name The name of the ONNX operator to check for support - */ + //! + //!\brief Returns whether the specified operator may be supported by the + //! parser. + //! + //! Note that a result of true does not guarantee that the operator will be + //! supported in all cases (i.e., this function may return false-positives). + //! + //! \param op_name The name of the ONNX operator to check for support + //! virtual bool supportsOperator(const char* op_name) const = 0; - /** \brief destroy this object - * - * \warning deprecated and planned on being removed in TensorRT 10.0 - */ + + //! + //!\brief destroy this object + //! + //! \warning deprecated and planned on being removed in TensorRT 10.0 + //! TRT_DEPRECATED virtual void destroy() = 0; - /** \brief Get the number of errors that occurred during prior calls to - * \p parse - * - * \see getError() clearErrors() IParserError - */ + + //! + //!\brief Get the number of errors that occurred during prior calls to + //! \p parse + //! + //! \see getError() clearErrors() IParserError + //! virtual int getNbErrors() const = 0; - /** \brief Get an error that occurred during prior calls to \p parse - * - * \see getNbErrors() clearErrors() IParserError - */ + + //! + //!\brief Get an error that occurred during prior calls to \p parse + //! + //! \see getNbErrors() clearErrors() IParserError + //! virtual IParserError const* getError(int index) const = 0; - /** \brief Clear errors from prior calls to \p parse - * - * \see getNbErrors() getError() IParserError - */ + + //! + //!\brief Clear errors from prior calls to \p parse + //! + //! \see getNbErrors() getError() IParserError + //! virtual void clearErrors() = 0; + //! + //! \brief Set the parser flags. + //! + //! The flags are listed in the OnnxParserFlag enum. + //! + //! \param OnnxParserFlag The flags used when parsing an ONNX model. + //! + //! \note This function will override the previous set flags, rather than bitwise ORing the new flag. + //! + //! \see getFlags() + //! + virtual void setFlags(OnnxParserFlags onnxParserFlags) noexcept = 0; + + //! + //! \brief Get the parser flags. Defaults to 0. + //! + //! \return The parser flags as a bitmask. + //! + //! \see setFlags() + //! + virtual OnnxParserFlags getFlags() const noexcept = 0; + + //! + //! \brief clear a parser flag. + //! + //! clears the parser flag from the enabled flags. + //! + //! \see setFlags() + //! + virtual void clearFlag(OnnxParserFlag onnxParserFlag) noexcept = 0; + + //! + //! \brief Set a single parser flag. + //! + //! Add the input parser flag to the already enabled flags. + //! + //! \see setFlags() + //! + virtual void setFlag(OnnxParserFlag onnxParserFlag) noexcept = 0; + + //! + //! \brief Returns true if the parser flag is set + //! + //! \see getFlags() + //! + //! \return True if flag is set, false if unset. + //! + virtual bool getFlag(OnnxParserFlag onnxParserFlag) const noexcept = 0; + virtual ~IParser() noexcept = default; + + //! + //! \brief Query the plugin libraries needed to implement operations used by the parser in a version-compatible + //! engine. + //! + //! This provides a list of plugin libraries on the filesystem needed to implement operations + //! in the parsed network. If you are building a version-compatible engine using this network, + //! provide this list to IBuilderConfig::setPluginsToSerialize to serialize these plugins along + //! with the version-compatible engine, or, if you want to ship these plugin libraries externally + //! to the engine, ensure that IPluginRegistry::loadLibrary is used to load these libraries in the + //! appropriate runtime before deserializing the corresponding engine. + //! + //! \param[out] nbPluginLibs Returns the number of plugin libraries in the array, or -1 if there was an error. + //! \return Array of `nbPluginLibs` C-strings describing plugin library paths on the filesystem if nbPluginLibs > 0, + //! or nullptr otherwise. This array is owned by the IParser, and the pointers in the array are only valid until + //! the next call to parse(), supportsModel(), parseFromFile(), or parseWithWeightDescriptors(). + //! + virtual char const* const* getUsedVCPluginLibraries(int64_t& nbPluginLibs) const noexcept = 0; }; } // namespace nvonnxparser @@ -210,20 +330,21 @@ namespace nvonnxparser namespace { -/** \brief Create a new parser object - * - * \param network The network definition that the parser will write to - * \param logger The logger to use - * \return a new parser object or NULL if an error occurred - * - * Any input dimensions that are constant should not be changed after parsing, - * because correctness of the translation may rely on those constants. - * Changing a dynamic input dimension, i.e. one that translates to -1 in - * TensorRT, to a constant is okay if the constant is consistent with the model. - * Each instance of the parser is designed to only parse one ONNX model once. - * - * \see IParser - */ +//! +//! \brief Create a new parser object +//! +//! \param network The network definition that the parser will write to +//! \param logger The logger to use +//! \return a new parser object or NULL if an error occurred +//! +//! Any input dimensions that are constant should not be changed after parsing, +//! because correctness of the translation may rely on those constants. +//! Changing a dynamic input dimension, i.e. one that translates to -1 in +//! TensorRT, to a constant is okay if the constant is consistent with the model. +//! Each instance of the parser is designed to only parse one ONNX model once. +//! +//! \see IParser +//! inline IParser* createParser(nvinfer1::INetworkDefinition& network, nvinfer1::ILogger& logger) { return static_cast(createNvOnnxParser_INTERNAL(&network, &logger, NV_ONNX_PARSER_VERSION)); diff --git a/README.md b/README.md index 5364e31e..952b789e 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ For press and other inquiries, please contact Hector Marinez at hmarinez@nvidia. ## Supported TensorRT Versions -Development on the `main` branch is for the latest version of [TensorRT 8.5.1](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support. +Development on the `main` branch is for the latest version of [TensorRT 8.6.0](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support. For previous versions of TensorRT, refer to their respective branches. @@ -48,8 +48,8 @@ Current supported ONNX operators are found in the [operator support matrix](docs ### Dependencies - [Protobuf >= 3.0.x](https://github.com/google/protobuf/releases) - - [TensorRT 8.5.1](https://developer.nvidia.com/tensorrt) - - [TensorRT 8.5.1 open source libaries (main branch)](https://github.com/NVIDIA/TensorRT/) + - [TensorRT 8.6.0](https://developer.nvidia.com/tensorrt) + - [TensorRT 8.6.0 open source libaries (main branch)](https://github.com/NVIDIA/TensorRT/) ### Building @@ -92,7 +92,7 @@ Python bindings for the ONNX-TensorRT parser are packaged in the shipped `.whl` python3 -m pip install /python/tensorrt-8.x.x.x-cp-none-linux_x86_64.whl -TensorRT 8.5.1 supports ONNX release 1.12.0. Install it with: +TensorRT 8.6.0 supports ONNX release 1.12.0. Install it with: python3 -m pip install onnx==1.12.0 diff --git a/TensorOrWeights.hpp b/TensorOrWeights.hpp index 75cfd8c1..032515b8 100644 --- a/TensorOrWeights.hpp +++ b/TensorOrWeights.hpp @@ -107,15 +107,15 @@ class TensorOrWeights { if (is_tensor()) { - switch(_tensor->getType()) + switch (_tensor->getType()) { - case nvinfer1::DataType::kFLOAT:return "FLOAT"; - case nvinfer1::DataType::kHALF: return "HALF"; - case nvinfer1::DataType::kINT8: return "INT8"; - case nvinfer1::DataType::kUINT8: return "UINT8"; - case nvinfer1::DataType::kINT32: return "INT32"; - case nvinfer1::DataType::kBOOL: return "BOOL"; - default: return "UNKNOWN TYPE"; + case nvinfer1::DataType::kFLOAT:return "FLOAT"; + case nvinfer1::DataType::kHALF: return "HALF"; + case nvinfer1::DataType::kINT8: return "INT8"; + case nvinfer1::DataType::kUINT8: return "UINT8"; + case nvinfer1::DataType::kINT32: return "INT32"; + case nvinfer1::DataType::kBOOL: return "BOOL"; + case nvinfer1::DataType::kFP8: return "FP8"; } } else @@ -130,9 +130,9 @@ class TensorOrWeights case ::ONNX_NAMESPACE::TensorProto::BOOL: return "BOOL"; case ::ONNX_NAMESPACE::TensorProto::INT32: return "INT32"; case ::ONNX_NAMESPACE::TensorProto::INT64: return "INT32"; - default: return "UNKNOWN TYPE"; } } + return "UNKNOWN TYPE"; } }; diff --git a/builtin_op_importers.cpp b/builtin_op_importers.cpp index c92edb72..6a3bb3bc 100644 --- a/builtin_op_importers.cpp +++ b/builtin_op_importers.cpp @@ -2,25 +2,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -// Include assert.h prior to the below WAR to ensure that assert() -// definition obeys the NDEBUG define -#include - -// Workaround for NDEBUG causing functional -// differences in ONNX / protobuf code on aarch64 platforms which leads -// to corruption. -#if defined(__aarch64__) && defined(__linux__) && defined(NDEBUG) -#undef NDEBUG -#define REDEFINE_NDEBUG -#endif - -#include - -#if defined(REDEFINE_NDEBUG) -#define NDEBUG -#undef REDEFINE_NDEBUG -#endif - +#include "builtin_op_importers.hpp" #include "ConditionalHelpers.hpp" #include "LoopHelpers.hpp" #include "ModelImporter.hpp" @@ -30,7 +12,6 @@ #include "OnnxAttrs.hpp" #include "RNNHelpers.hpp" #include "ShapeTensor.hpp" -#include "builtin_op_importers.hpp" #include "half.h" #include "onnx2trt_utils.hpp" @@ -89,7 +70,7 @@ using nvinfer1::DataType; do \ { \ nvinfer1::ILayer* layer_ptr = layer; \ - ASSERT(layer_ptr && "Input layer is null.", ErrorCode::kUNSUPPORTED_NODE); \ + ASSERT(layer_ptr && "Input layer is null.", ErrorCode::kUNSUPPORTED_NODE); \ return {{layer_ptr->getOutput(0)}}; \ } while (0) @@ -97,7 +78,7 @@ using nvinfer1::DataType; do \ { \ TensorOrWeights output = identity(ctx, input); \ - ASSERT(output && "Failed to add an identity layer.", ErrorCode::kUNSUPPORTED_NODE); \ + ASSERT(output && "Failed to add an identity layer.", ErrorCode::kUNSUPPORTED_NODE); \ return {{output}}; \ } while (0) @@ -240,7 +221,7 @@ NodeImportResult batchnormFallback( ->getOutput(0), *bias, eOp::kSUM); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); RETURN_FIRST_OUTPUT(layer); } @@ -307,26 +288,15 @@ DEFINE_BUILTIN_OP_IMPORTER(Cast) nvinfer1::ITensor& tensor = convertToTensor(inputs.at(0), ctx); OnnxAttrs attrs(node, ctx); // Get data type to cast to. - DataType dtype = tensor.getType(); auto onnxType = attrs.get("to"); DataType newType{DataType::kFLOAT}; ASSERT( convertDtype(onnxType, &newType) && "Unsupported data type for the Cast operator!", ErrorCode::kINVALID_NODE); LOG_VERBOSE("Casting to type: " << newType); - // UINT8 type casting is only supported from FP types. - if (dtype == DataType::kUINT8) - { - ASSERT(newType == DataType::kFLOAT || newType == DataType::kHALF, ErrorCode::kUNSUPPORTED_NODE); - } - if (newType == DataType::kUINT8) - { - ASSERT(dtype == DataType::kFLOAT || dtype == DataType::kHALF, ErrorCode::kUNSUPPORTED_NODE); - } // Add the layer. - nvinfer1::IIdentityLayer* layer = ctx->network()->addIdentity(tensor); - layer->setOutputType(0, newType); - ctx->registerLayer(layer, getNodeName(node)); + nvinfer1::ICastLayer* layer = ctx->network()->addCast(tensor, newType); + ctx->registerLayer(layer, node); RETURN_FIRST_OUTPUT(layer); } @@ -385,13 +355,13 @@ DEFINE_BUILTIN_OP_IMPORTER(Celu) ASSERT(elementwiseCheck(tempInputs, eOp::kDIV) && "Elementwise layer does not support the given inputs and operator.", ErrorCode::kUNSUPPORTED_NODE); nvinfer1::ITensor* combined = inputTensors.at(0); auto* divLayer = ctx->network()->addElementWise(*combined, *inputTensors.at(3), eOp::kDIV); - ctx->registerLayer(divLayer, getNodeName(node)); + ctx->registerLayer(divLayer, node); ASSERT(divLayer && "Failed to register layer.", ErrorCode::kUNSUPPORTED_NODE); combined = divLayer->getOutput(0); // Calculate exp(x/alpha) -> 4 nvinfer1::IUnaryLayer* uLayer = ctx->network()->addUnary(*combined, uOp::kEXP); - ctx->registerLayer(uLayer, getNodeName(node)); + ctx->registerLayer(uLayer, node); combined = uLayer->getOutput(0); inputTensors.push_back(combined); @@ -421,7 +391,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Celu) && "The number of dimensions should remain the same adding inputs.", ErrorCode::kUNSUPPORTED_NODE); auto* layer = ctx->network()->addElementWise(*firstTensor, *secondTensor, op); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); ASSERT(layer && "Failed to register layer.", ErrorCode::kUNSUPPORTED_NODE); inputTensors.push_back(layer->getOutput(0)); } @@ -580,7 +550,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Concat) int32_t nbDims = inputs.at(0).shape().nbDims; CHECK(convertAxis(axis, nbDims)); auto* layer = ctx->network()->addConcatenation(tensors.data(), tensors.size()); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); ASSERT(layer && "Failed to register layer.", ErrorCode::kUNSUPPORTED_NODE); layer->setAxis(axis); RETURN_FIRST_OUTPUT(layer); @@ -754,7 +724,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Conv) ErrorCode::kINVALID_NODE); layer->setNbGroups(ngroup); // Register layer name as well as kernel weights and bias weights (if any) - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); ctx->network()->setWeightsName(kernelWeights, inputs.at(1).weights().getName()); if (inputs.size() == 3) { @@ -774,6 +744,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Conv) LOG_VERBOSE("Using kernel: " << kernelSize << ", strides: " << strides << ", prepadding: " << begPadding << ", postpadding: " << endPadding << ", dilations: " << dilations << ", numOutputs: " << noutput); LOG_VERBOSE("Convolution output dimensions: " << dims); + return {{tensorPtr}}; } @@ -966,7 +937,7 @@ DEFINE_BUILTIN_OP_IMPORTER(ConvTranspose) << "Post-padding: " << endPadding); // Register layer, along with refittable kernel weights and bias weights (if any) - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); tensorPtr = layer->getOutput(0); dims = tensorPtr->getDimensions(); @@ -1025,6 +996,7 @@ DEFINE_BUILTIN_OP_IMPORTER(ConvTranspose) tensorPtr = squeezeTensor(ctx, node, *tensorPtr, axes); ASSERT(tensorPtr && "Failed to squeeze tensor.", ErrorCode::kUNSUPPORTED_NODE); } + return {{tensorPtr}}; } @@ -1067,7 +1039,8 @@ DEFINE_BUILTIN_OP_IMPORTER(CumSum) if (exclusive) { auto zero = createZeroTensor(ctx, inputSliced); - std::vector concatTensors = reverse == 1 ? std::vector{input, zero} : std::vector{zero, input}; + std::vector concatTensors = reverse == 1 ? std::vector{input, zero} + : std::vector{zero, input}; auto concat = ctx->network()->addConcatenation(concatTensors.data(), concatTensors.size()); concat->setAxis(axis); @@ -1169,7 +1142,7 @@ DEFINE_BUILTIN_OP_IMPORTER(DepthToSpace) auto* firstShuffle = addShuffle(ctx, *tensorPtr, firstShape); firstShuffle->setSecondTranspose(perm); - ctx->registerLayer(firstShuffle, getNodeName(node)); + ctx->registerLayer(firstShuffle, node); tensorPtr = firstShuffle->getOutput(0); // Finally reshape to {N, C / (blockSize * blockSize), H * blockSize, W * blockSize}; @@ -1182,7 +1155,7 @@ DEFINE_BUILTIN_OP_IMPORTER(DepthToSpace) // This is a helper function for QuantizeLinear/DequantizeLinear NodeImportResult QuantDequantLinearHelper( - IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, std::vector& inputs, bool isDQ) + IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, std::vector& inputs, bool isDQ, DataType datatype) { CHECK(notInvalidType(inputs.at(0), {"UINT8"})); @@ -1192,9 +1165,9 @@ NodeImportResult QuantDequantLinearHelper( CHECK(notInvalidType(inputs.at(2), {"UINT8"})); } auto addConstantLayer - = [ctx](nvinfer1::INetworkDefinition& network, ShapedWeights const& weights) -> nvinfer1::ITensor* { + = [ctx, node](nvinfer1::INetworkDefinition& network, ShapedWeights const& weights) -> nvinfer1::ITensor* { nvinfer1::IConstantLayer* constLayer = network.addConstant(weights.shape, weights); - ctx->registerLayer(constLayer, weights.getName()); + ctx->registerLayer(constLayer, weights.getName(), &node); network.setWeightsName(weights, weights.getName()); return constLayer->getOutput(0); }; @@ -1203,7 +1176,9 @@ NodeImportResult QuantDequantLinearHelper( return inputs.at(i).is_weights() && (ctx->getConstantLayer(inputs.at(i).weights().getName()) == nullptr); }; - ASSERT((inputs.size() == 3) && "This version of TensorRT requires 3 inputs for the DequantizeLinear operator.", + // Read the optional quantization axis attribute. Set it to the rank of the input tensor if not provided + ASSERT((inputs.size() >= 2) + && "This version of TensorRT requires at least 2 inputs for the QuantizeLinear/DequantizeLinear operator.", nvonnxparser::ErrorCode::kINVALID_NODE); std::string nodeName = getNodeName(node); @@ -1233,27 +1208,32 @@ NodeImportResult QuantDequantLinearHelper( // Input 2 initializes the layer's zero-point. nvinfer1::ITensor* zeroPointInput = nullptr; - if (newConstantInput(2)) + bool isFP8 = datatype == DataType::kFP8; + if (!isFP8) { - // Zero-point verification. - auto zeroPoint = inputs.at(2).weights(); - ASSERT(shiftIsAllZeros(zeroPoint) && "TRT only supports symmetric quantization - zeroPt must be all zeros", + if (newConstantInput(2)) + { + ASSERT((inputs.size() == 3) && "This version of TensorRT requires 3 inputs for the INT8 QuantizeLinear/DequantizeLinear operator.", + nvonnxparser::ErrorCode::kINVALID_NODE); + // Zero-point verification. + auto zeroPoint = inputs.at(2).weights(); + ASSERT(shiftIsAllZeros(zeroPoint) && "TensorRT only supports symmetric quantization. The zero point for the QuantizeLinear/DequantizeLinear operator must be all zeros.", + nvonnxparser::ErrorCode::kINVALID_NODE); + // Convert the zero-point to float because TRT uses float for zero-point. + auto fpZeroPoint = createZeroShifts(zeroPoint, ::ONNX_NAMESPACE::TensorProto::FLOAT, ctx); + fpZeroPoint.setName(zeroPoint.getName()); + zeroPointInput = addConstantLayer(*ctx->network(), fpZeroPoint); + } + else + { + zeroPointInput = &convertToTensor(inputs.at(2), ctx); + } + auto const zeroPointSize = volume(zeroPointInput->getDimensions()); + // ONNX may represent a scalar using either 0-D or 1-D, so compare sizes instead of shapes. + ASSERT(zeroPointSize == scaleSize && "The scale and zero point must have the same volume.", nvonnxparser::ErrorCode::kINVALID_NODE); - // Convert the zero-point to Float because that's TRT uses float for zero-point. - auto fpZeroPoint = createZeroShifts(zeroPoint, ::ONNX_NAMESPACE::TensorProto::FLOAT, ctx); - fpZeroPoint.setName(zeroPoint.getName()); - zeroPointInput = addConstantLayer(*ctx->network(), fpZeroPoint); } - else - { - zeroPointInput = &convertToTensor(inputs.at(2), ctx); - } - auto const zeroPointSize = volume(zeroPointInput->getDimensions()); - // ONNX may represent a scalar using either 0-D or 1-D, so compare sizes instead of shapes. - ASSERT(zeroPointSize == scaleSize && "The scale and zero-point must have the same size", - nvonnxparser::ErrorCode::kINVALID_NODE); - // Read the optional quantization axis attribute. Set it to the rank of the input tensor if not provided OnnxAttrs attrs(node, ctx); int32_t const nbDims = dataInput.getDimensions().nbDims; int32_t axis = attrs.get("axis", nbDims); @@ -1288,6 +1268,10 @@ NodeImportResult QuantDequantLinearHelper( ASSERT(dq && "Failed to create Dequantize layer.", ErrorCode::kUNSUPPORTED_NODE); dq->setAxis(axis); layer = dq; + if (isFP8) + { + layer->setPrecision(DataType::kFP8); + } } else { @@ -1296,18 +1280,41 @@ NodeImportResult QuantDequantLinearHelper( ASSERT(q && "Failed to create Quantize layer.", ErrorCode::kUNSUPPORTED_NODE); q->setAxis(axis); layer = q; + if (isFP8) + { + layer->setOutputType(0, DataType::kFP8); + } } layer->setName(nodeName.c_str()); - layer->setInput(2, *zeroPointInput); + if (zeroPointInput) + { + layer->setInput(2, *zeroPointInput); + } // Return layer output RETURN_FIRST_OUTPUT(layer); } + +DEFINE_BUILTIN_OP_IMPORTER(QuantizeLinear) +{ + return QuantDequantLinearHelper(ctx, node, inputs, false /*isDQ*/, DataType::kINT8); +} + DEFINE_BUILTIN_OP_IMPORTER(DequantizeLinear) { - return QuantDequantLinearHelper(ctx, node, inputs, true /*isDQ*/); + return QuantDequantLinearHelper(ctx, node, inputs, true /*isDQ*/, DataType::kINT8); +} + +DEFINE_BUILTIN_OP_IMPORTER(TRT_FP8QuantizeLinear) +{ + return QuantDequantLinearHelper(ctx, node, inputs, false /*isDQ*/, DataType::kFP8); +} + +DEFINE_BUILTIN_OP_IMPORTER(TRT_FP8DequantizeLinear) +{ + return QuantDequantLinearHelper(ctx, node, inputs, true /*isDQ*/, DataType::kFP8); } DECLARE_BUILTIN_OP_IMPORTER(Mul); @@ -1402,7 +1409,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Einsum) } auto nbInputs = static_cast(inputTensors.size()); nvinfer1::IEinsumLayer* einsumLayer = ctx->network()->addEinsum(inputTensors.data(), nbInputs, equation.c_str()); - ctx->registerLayer(einsumLayer, getNodeName(node)); + ctx->registerLayer(einsumLayer, node); RETURN_FIRST_OUTPUT(einsumLayer); } @@ -1458,7 +1465,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Expand) ShapeTensor const strides = min(ctx, one, sub(ctx, newDims, one)); nvinfer1::ISliceLayer* sliceLayer = addSlice(ctx, newInputTensor, starts, sizes, strides); - ctx->registerLayer(sliceLayer, getNodeName(node)); + ctx->registerLayer(sliceLayer, node); RETURN_FIRST_OUTPUT(sliceLayer); } @@ -1504,7 +1511,7 @@ DEFINE_BUILTIN_OP_IMPORTER(EyeLike) std::memcpy(tempWeights.values, values.data(), values.size() * sizeof(int)); auto* layer = ctx->network()->addConstant(dims, tempWeights); layer->setOutputType(0, DataType::kINT32); - ctx->registerLayer(layer, node.name()); + ctx->registerLayer(layer, node); if (dtype != DataType::kINT32) { @@ -1550,7 +1557,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Gather) CHECK(convertAxis(axis, nbDims)); LOG_VERBOSE("Using Gather axis: " << axis); auto* layer = ctx->network()->addGather(data, indices, axis); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); RETURN_FIRST_OUTPUT(layer); } @@ -1569,7 +1576,7 @@ DEFINE_BUILTIN_OP_IMPORTER(GatherElements) auto* layer = ctx->network()->addGatherV2(data, indices, nvinfer1::GatherMode::kELEMENT); layer->setGatherAxis(axis); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); RETURN_FIRST_OUTPUT(layer); } @@ -1584,7 +1591,7 @@ DEFINE_BUILTIN_OP_IMPORTER(GatherND) auto* layer = ctx->network()->addGatherV2(data, indices, nvinfer1::GatherMode::kND); layer->setNbElementWiseDims(nbElementWiseDims); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); RETURN_FIRST_OUTPUT(layer); } @@ -1620,7 +1627,8 @@ DEFINE_BUILTIN_OP_IMPORTER(Gemm) LOG_VERBOSE("Using opA: " << static_cast(opA) << " opB: " << static_cast(opB)); nvinfer1::IMatrixMultiplyLayer* matmul = ctx->network()->addMatrixMultiply(inputA, opA, inputB, opB); - ctx->registerLayer(matmul, getNodeName(node)); + ctx->registerLayer(matmul, node); + nvinfer1::ITensor* matmulTensor = matmul->getOutput(0); // Scale A*B if needed. @@ -1708,6 +1716,11 @@ DEFINE_BUILTIN_OP_IMPORTER(GreaterOrEqual) /*greater*/ true)}}; } +DEFINE_BUILTIN_OP_IMPORTER(GroupNormalization) +{ + return normalizationHelper(ctx, node, inputs); +} + // singlePassShape is the shape of the output from a single pass. nvinfer1::ITensor* concatenateRNNOutputs(IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, nvinfer1::ILoop* loop, nvinfer1::ITensor* singlePassShape, nvinfer1::ITensor* sequenceLength, @@ -1951,7 +1964,7 @@ DEFINE_BUILTIN_OP_IMPORTER(GRU) LOG_VERBOSE("Initial hidden state shape: " << initialHidden->getDimensions()); nvinfer1::IRecurrenceLayer* Ht1 = loop->addRecurrence(*initialHidden); - ctx->registerLayer(Ht1, getNodeName(node)); + ctx->registerLayer(Ht1, node); LOG_VERBOSE("Hidden state shape: " << Ht1->getOutput(0)->getDimensions()); // Compute stackedZR(t) = f(X(t) * W[zr]^T + H(t-1) * R[zr]^T + (Wb[zr] + Rb[zr])). stackedZR(t) has shape @@ -2103,7 +2116,7 @@ DEFINE_BUILTIN_OP_IMPORTER(HardSigmoid) DEFINE_BUILTIN_OP_IMPORTER(Identity) { auto* layer = ctx->network()->addIdentity(convertToTensor(inputs.at(0), ctx)); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); RETURN_FIRST_OUTPUT(layer); } @@ -2185,66 +2198,60 @@ DEFINE_BUILTIN_OP_IMPORTER(ImageScaler) // Finally add the scale layer. auto layer = ctx->network()->addScale( tensor, nvinfer1::ScaleMode::kCHANNEL, shiftWeights, scaleWeights, nvinfer1::Weights{}); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); RETURN_FIRST_OUTPUT(layer); } DEFINE_BUILTIN_OP_IMPORTER(InstanceNormalization) { - // Scales and biases must be initializers - ASSERT(inputs.at(1).is_weights() && "The scale tensor is required to be an initializer.", - ErrorCode::kUNSUPPORTED_NODE); - ASSERT( - inputs.at(2).is_weights() && "The bias tensor is required to be an initializer.", ErrorCode::kUNSUPPORTED_NODE); - nvinfer1::ITensor* tensorPtr = &convertToTensor(inputs.at(0), ctx); - int32_t nbDims = tensorPtr->getDimensions().nbDims; - ASSERT(nbDims >= 3 && nbDims <= 5 && "TensorRT only supports InstanceNormalization on 3D, 4D, or 5D tensors!", - ErrorCode::kUNSUPPORTED_NODE); - - const bool needToExpandDims = (nbDims == 3); - if (needToExpandDims) + // Choose plugin implementation for non-VC engines, and native implementation + // for VC engines. + auto flags = ctx->getFlags(); + uint32_t vcFlag = 1U << static_cast(nvonnxparser::OnnxParserFlag::kVERSION_COMPATIBLE); + if (flags & vcFlag) { - // Expand spatial dims from 1D to 2D - std::vector const axes{3}; - tensorPtr = unsqueezeTensor(ctx, node, *tensorPtr, axes); - ASSERT(tensorPtr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); + return normalizationHelper(ctx, node, inputs); } - auto scale_weights = inputs.at(1).weights(); - auto bias_weights = inputs.at(2).weights(); - OnnxAttrs attrs(node, ctx); - float epsilon = attrs.get("epsilon", 1e-5f); - int32_t const relu{0}; // the ONNX instance norm op does not use the relu parameter - float const alpha{0.f}; // the ONNX instance norm op does not use the alpha parameter - - // Populate instanceNormalization plugin properties. - std::string const pluginName = "InstanceNormalization_TRT"; - std::string const pluginVersion = "1"; - std::vector f; - f.emplace_back("epsilon", &epsilon, nvinfer1::PluginFieldType::kFLOAT32, 1); - f.emplace_back("scales", scale_weights.values, nvinfer1::PluginFieldType::kFLOAT32, scale_weights.count()); - f.emplace_back("bias", bias_weights.values, nvinfer1::PluginFieldType::kFLOAT32, bias_weights.count()); - f.emplace_back("relu", &relu, nvinfer1::PluginFieldType::kINT32, 1); - f.emplace_back("alpha", &alpha, nvinfer1::PluginFieldType::kFLOAT32, 1); + return instanceNormPluginHelper(ctx, node, inputs); +} - // Create plugin from registry - auto const plugin = createPlugin(getNodeName(node), importPluginCreator(pluginName, pluginVersion), f); +DEFINE_BUILTIN_OP_IMPORTER(IsInf) +{ + OnnxAttrs attrs{node, ctx}; + int32_t const detectNegative = attrs.get("detect_negative", 1); + int32_t const detectPositive = attrs.get("detect_positive", 1); - ASSERT(plugin != nullptr && "InstanceNormalization plugin was not found in the plugin registry!", - ErrorCode::kUNSUPPORTED_NODE); + if (detectNegative && detectPositive) + { + return unaryHelper(ctx, node, inputs.at(0), nvinfer1::UnaryOperation::kISINF); + } - auto* layer = ctx->network()->addPluginV2(&tensorPtr, 1, *plugin); - ctx->registerLayer(layer, getNodeName(node)); - tensorPtr = layer->getOutput(0); + auto& input = convertToTensor(inputs.at(0), ctx); + auto inputDims = input.getDimensions(); + nvinfer1::Dims scalarDims{inputDims.nbDims}; + std::fill(scalarDims.d, scalarDims.d + scalarDims.nbDims, 1); + auto& zeroTensor = *addConstantScalar(ctx, 0.F, ::ONNX_NAMESPACE::TensorProto::FLOAT, scalarDims)->getOutput(0); - if (needToExpandDims) + if (detectNegative) { - // Un-expand spatial dims back to 1D - std::vector const axes{3}; - tensorPtr = squeezeTensor(ctx, node, *tensorPtr, axes); - ASSERT(tensorPtr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); + auto* isNeg + = ctx->network()->addElementWise(input, zeroTensor, nvinfer1::ElementWiseOperation::kLESS)->getOutput(0); + auto* isInf = ctx->network()->addUnary(input, nvinfer1::UnaryOperation::kISINF)->getOutput(0); + RETURN_FIRST_OUTPUT(ctx->network()->addElementWise(*isNeg, *isInf, nvinfer1::ElementWiseOperation::kAND)); } - - return {{tensorPtr}}; + if (detectPositive) + { + auto* isPos + = ctx->network()->addElementWise(input, zeroTensor, nvinfer1::ElementWiseOperation::kGREATER)->getOutput(0); + auto* isInf = ctx->network()->addUnary(input, nvinfer1::UnaryOperation::kISINF)->getOutput(0); + RETURN_FIRST_OUTPUT(ctx->network()->addElementWise(*isPos, *isInf, nvinfer1::ElementWiseOperation::kAND)); + } + // In this case, always return false. + auto* isPos + = ctx->network()->addElementWise(input, zeroTensor, nvinfer1::ElementWiseOperation::kGREATER)->getOutput(0); + auto* isNeg + = ctx->network()->addElementWise(input, zeroTensor, nvinfer1::ElementWiseOperation::kLESS)->getOutput(0); + RETURN_FIRST_OUTPUT(ctx->network()->addElementWise(*isPos, *isNeg, nvinfer1::ElementWiseOperation::kAND)); } DEFINE_BUILTIN_OP_IMPORTER(IsNaN) @@ -2260,6 +2267,41 @@ DEFINE_BUILTIN_OP_IMPORTER(IsNaN) return unaryHelper(ctx, node, equalRet, nvinfer1::UnaryOperation::kNOT); } +DEFINE_BUILTIN_OP_IMPORTER(LayerNormalization) +{ + auto* input = &convertToTensor(inputs.at(0), ctx); + auto* scale = &convertToTensor(inputs.at(1), ctx); + auto biasType = input->getType() == nvinfer1::DataType::kFLOAT ? ::ONNX_NAMESPACE::TensorProto::FLOAT + : ::ONNX_NAMESPACE::TensorProto::FLOAT16; + auto* bias + = inputs.size() == 3 ? &convertToTensor(inputs.at(2), ctx) : addConstantScalar(ctx, 0, biasType)->getOutput(0); + + OnnxAttrs attrs(node, ctx); + float epsilon = attrs.get("epsilon", 1e-5f); + int32_t axis = attrs.get("axis", -1); + nvinfer1::DataType computeType = nvinfer1::DataType::kFLOAT; + convertDtype(attrs.get("stash_type", 1), &computeType); + + int32_t const nbDims = input->getDimensions().nbDims; + CHECK(convertAxis(axis, nbDims)); + uint32_t axesMask{0}; + + // Populate axesMask with axis values + for (int32_t i = axis; i < nbDims; i++) + { + axesMask |= 1 << i; + } + + // Broadcast scale and bias to input size + broadcastTensors(ctx, input, scale); + broadcastTensors(ctx, input, bias); + + auto* layer = ctx->network()->addNormalization(*input, *scale, *bias, axesMask); + layer->setEpsilon(epsilon); + layer->setComputePrecision(computeType); + RETURN_FIRST_OUTPUT(layer); +} + DEFINE_BUILTIN_OP_IMPORTER(LeakyRelu) { OnnxAttrs attrs(node, ctx); @@ -2300,7 +2342,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Loop) constexpr int32_t NB_NON_STATE_INPUTS = 2; // First 2 inputs are trip count and condition respectively. constexpr int32_t NB_DISCARDED_OUTPUTS = 1; // First output is the updated value of the condition, and is ignored by the outer loop node. - constexpr int32_t MAX_SCAN_OUTPUT_LENGTH = 1024; // Maximum length for scan outputs if trip count is not set. + constexpr int32_t DUMMY_SCAN_OUTPUT_LENGTH = 1; ASSERT((inputs.size() >= 2) && "The Loop operator requires at least 2 inputs.", ErrorCode::kINVALID_NODE); OnnxAttrs attrs(node, ctx); int32_t const nbInputs = node.input().size(); @@ -2330,7 +2372,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Loop) ctx->loopTensors()[body.input(0).name()] = node.input(0); loop->addTripLimit(*tripLimit, nvinfer1::TripLimit::kCOUNT); // First graph input is iteration_num, so create a loop counter - auto counter = addLoopCounter(ctx, loop, 0); + auto counter = convertToScalar(ctx, addLoopCounter(ctx, loop, 0)); ctx->registerTensor(counter, body.input(0).name()); } } @@ -2349,6 +2391,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Loop) stateVars.emplace_back(loop->addRecurrence(convertToTensor(inputs[i], ctx))); ctx->loopTensors()[body.input(i).name()] = node.input(i); ctx->registerTensor(TensorOrWeights{stateVars.back()->getOutput(0)}, body.input(i).name()); + LOG_VERBOSE("Mapped Loop node input " << node.input(i) << " to loop body input " << body.input(i).name()); } // Loop body @@ -2398,9 +2441,9 @@ DEFINE_BUILTIN_OP_IMPORTER(Loop) } else { - trtScanOut->setInput( - 1, *addConstantScalar(ctx, MAX_SCAN_OUTPUT_LENGTH, ::ONNX_NAMESPACE::TensorProto_DataType_INT32) - ->getOutput(0)); + trtScanOut->setInput(1, + *addConstantScalar(ctx, DUMMY_SCAN_OUTPUT_LENGTH, ::ONNX_NAMESPACE::TensorProto_DataType_INT32) + ->getOutput(0)); } nodeOutputs.emplace_back(trtScanOut->getOutput(0)); } @@ -2417,7 +2460,7 @@ DEFINE_BUILTIN_OP_IMPORTER(LRN) float beta = attrs.get("beta", 0.75f); float bias = attrs.get("bias", 1.0f); auto* layer = ctx->network()->addLRN(tensor, size, alpha, beta, bias); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); RETURN_FIRST_OUTPUT(layer); } @@ -2537,7 +2580,7 @@ DEFINE_BUILTIN_OP_IMPORTER(LSTM) // H(t-1) nvinfer1::IRecurrenceLayer* Ht1 = loop->addRecurrence(*initialHidden); - ctx->registerLayer(Ht1, getNodeName(node)); + ctx->registerLayer(Ht1, node); LOG_VERBOSE("Hidden state shape: " << Ht1->getOutput(0)->getDimensions()); // C(t-1) @@ -2732,46 +2775,46 @@ DEFINE_BUILTIN_OP_IMPORTER(LpNormalization) if (p == 1) { // abs(x) nvinfer1::IUnaryLayer* absLayer = ctx->network()->addUnary(*input, uOp::kABS); - ctx->registerLayer(absLayer, getNodeName(node)); + ctx->registerLayer(absLayer, node); norm = absLayer->getOutput(0); // norm coeff = sum(abs(x)) along axis dimension nvinfer1::IReduceLayer* reduceLayer = ctx->network()->addReduce(*norm, rOp::kSUM, 1 << axis, true); - ctx->registerLayer(reduceLayer, getNodeName(node)); + ctx->registerLayer(reduceLayer, node); norm = reduceLayer->getOutput(0); } else if (p == 2) { // x^2 auto* sqrLayer = ctx->network()->addElementWise(*input, *input, eOp::kPROD); - ctx->registerLayer(sqrLayer, getNodeName(node)); + ctx->registerLayer(sqrLayer, node); norm = sqrLayer->getOutput(0); // sum(x^2) along axis dimension nvinfer1::IReduceLayer* reduceLayer = ctx->network()->addReduce(*norm, rOp::kSUM, 1 << axis, true); - ctx->registerLayer(reduceLayer, getNodeName(node)); + ctx->registerLayer(reduceLayer, node); norm = reduceLayer->getOutput(0); // norm coeff = sqrt(sum(x^2)) nvinfer1::IUnaryLayer* sqrtLayer = ctx->network()->addUnary(*norm, uOp::kSQRT); - ctx->registerLayer(sqrtLayer, getNodeName(node)); + ctx->registerLayer(sqrtLayer, node); norm = sqrtLayer->getOutput(0); } // norm coeff |= 1 (change 0s to 1s, leave all other values same) nvinfer1::IElementWiseLayer* maskLayer = ctx->network()->addElementWise(*norm, *zerosTensor, eOp::kEQUAL); - ctx->registerLayer(maskLayer, getNodeName(node)); + ctx->registerLayer(maskLayer, node); nvinfer1::ITensor* mask = maskLayer->getOutput(0); mask = castHelper(ctx, mask, dt); auto* combinedLayer = ctx->network()->addElementWise(*norm, *mask, eOp::kSUM); - ctx->registerLayer(combinedLayer, getNodeName(node)); + ctx->registerLayer(combinedLayer, node); norm = combinedLayer->getOutput(0); // x/(norm coeff) // norm tensor is broadcast along axis dimension to match shape of input auto *layer = ctx->network()->addElementWise( *input, *norm, eOp::kDIV); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); ASSERT(layer && "Failed to register layer.", ErrorCode::kUNSUPPORTED_NODE); RETURN_FIRST_OUTPUT(layer); @@ -2814,12 +2857,12 @@ DEFINE_BUILTIN_OP_IMPORTER(LpPool) if (p == 1) { // x' = abs(x) nvinfer1::IUnaryLayer* absLayer = ctx->network()->addUnary(*input, uOp::kABS); - ctx->registerLayer(absLayer, getNodeName(node)); + ctx->registerLayer(absLayer, node); output = absLayer->getOutput(0); } else if (p == 2) { // x' = x^2 auto* sqrLayer = ctx->network()->addElementWise(*input, *input, eOp::kPROD); - ctx->registerLayer(sqrLayer, getNodeName(node)); + ctx->registerLayer(sqrLayer, node); output = sqrLayer->getOutput(0); } @@ -2830,19 +2873,19 @@ DEFINE_BUILTIN_OP_IMPORTER(LpPool) poolLayer->setPostPadding(endPadding); poolLayer->setStrideNd(strides); poolLayer->setAverageCountExcludesPadding(exclude_padding); - ctx->registerLayer(poolLayer, getNodeName(node)); + ctx->registerLayer(poolLayer, node); output = poolLayer->getOutput(0); // pool_sum = pool_avg(x')*kernel_size auto* correctedSumLayer = ctx->network()->addElementWise(*output, *kernelSzTensor, eOp::kPROD); - ctx->registerLayer(correctedSumLayer, getNodeName(node)); + ctx->registerLayer(correctedSumLayer, node); output = correctedSumLayer->getOutput(0); // if p == 1, output = pool_sum // if p == 2, output = sqrt(pool_sum) if (p == 2) { nvinfer1::IUnaryLayer* sqrtLayer = ctx->network()->addUnary(*output, uOp::kSQRT); - ctx->registerLayer(sqrtLayer, getNodeName(node)); + ctx->registerLayer(sqrtLayer, node); output = sqrtLayer->getOutput(0); } return {{output}}; @@ -2882,7 +2925,7 @@ DEFINE_BUILTIN_OP_IMPORTER(MatMul) nvinfer1::MatrixOperation opB = getMatrixOp(*inputB); nvinfer1::IMatrixMultiplyLayer* matmul = ctx->network()->addMatrixMultiply(*inputA, opA, *inputB, opB); - ctx->registerLayer(matmul, getNodeName(node)); + ctx->registerLayer(matmul, node); auto outputTensor = matmul->getOutput(0); if (needSqueezeHead) @@ -2897,6 +2940,7 @@ DEFINE_BUILTIN_OP_IMPORTER(MatMul) std::vector axes{outputTensor->getDimensions().nbDims - 1}; outputTensor = squeezeTensor(ctx, node, *outputTensor, axes); } + return {{outputTensor}}; } @@ -2965,27 +3009,27 @@ DEFINE_BUILTIN_OP_IMPORTER(MeanVarianceNormalization) // mean(x) along axes direction auto* reduceLayer = ctx->network()->addReduce(*input, rOp::kAVG, axesMask, true); - ctx->registerLayer(reduceLayer, getNodeName(node)); + ctx->registerLayer(reduceLayer, node); auto* meanX = reduceLayer->getOutput(0); // numerator: x-mean(x) auto* numSubLayer = ctx->network()->addElementWise(*input, *meanX, eOp::kSUB); - ctx->registerLayer(numSubLayer, getNodeName(node)); + ctx->registerLayer(numSubLayer, node); auto* numerator = numSubLayer->getOutput(0); // (x-mean(x))^2 auto* sqrLayer = ctx->network()->addElementWise(*numerator, *numerator, eOp::kPROD); - ctx->registerLayer(sqrLayer, getNodeName(node)); + ctx->registerLayer(sqrLayer, node); auto* sqrNumerator = sqrLayer->getOutput(0); // mean((x-mean(x))^2) auto* meanLayer = ctx->network()->addReduce(*sqrNumerator, rOp::kAVG, axesMask, true); - ctx->registerLayer(meanLayer, getNodeName(node)); + ctx->registerLayer(meanLayer, node); auto* variance = meanLayer->getOutput(0); // sqrt(mean((x-mean(x))^2)) nvinfer1::IUnaryLayer* sqrtLayer = ctx->network()->addUnary(*variance, uOp::kSQRT); - ctx->registerLayer(sqrtLayer, getNodeName(node)); + ctx->registerLayer(sqrtLayer, node); auto* stdDev = sqrtLayer->getOutput(0); // denominator: avoid division by zero @@ -2993,12 +3037,12 @@ DEFINE_BUILTIN_OP_IMPORTER(MeanVarianceNormalization) std::fill(scalarShape.d, scalarShape.d + scalarShape.nbDims, 1); auto* epsilonTensor = addConstantScalar(ctx, 1e-9f, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT, scalarShape)->getOutput(0); auto* addEpsLayer = ctx->network()->addElementWise(*stdDev, *epsilonTensor, eOp::kSUM); - ctx->registerLayer(addEpsLayer, getNodeName(node)); + ctx->registerLayer(addEpsLayer, node); stdDev = addEpsLayer->getOutput(0); // division numerator/standard-deviation auto* divLayer = ctx->network()->addElementWise(*numerator, *stdDev, eOp::kDIV); - ctx->registerLayer(divLayer, getNodeName(node)); + ctx->registerLayer(divLayer, node); ASSERT(divLayer && "Failed to register layer.", ErrorCode::kUNSUPPORTED_NODE); @@ -3033,7 +3077,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Mod) // Result = input0 - (input1 * floorDiv(input0, input1)) nvinfer1::IElementWiseLayer* resultLayer = modWithIntegerInputs(ctx, input0, input1, false); - ctx->registerLayer(resultLayer, getNodeName(node)); + ctx->registerLayer(resultLayer, node); RETURN_FIRST_OUTPUT(resultLayer); } // Fmod with integer inputs @@ -3041,7 +3085,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Mod) { // Result = input0 - (input1 * Div(input0, input1)) nvinfer1::IElementWiseLayer* resultLayer = modWithIntegerInputs(ctx, input0, input1, true); - ctx->registerLayer(resultLayer, getNodeName(node)); + ctx->registerLayer(resultLayer, node); RETURN_FIRST_OUTPUT(resultLayer); } // Fmod with floating point inputs @@ -3065,7 +3109,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Mod) // If (input0 / input1) >= 0, result = input0 - (input1 * floor(input0 / input1)) // Else result = input0 - (input1 * ceil(input0 / input1)) auto* result = ctx->network()->addSelect(*condition, *outputWithDivFloor, *outputWithDivCeil); - ctx->registerLayer(result, getNodeName(node)); + ctx->registerLayer(result, node); RETURN_FIRST_OUTPUT(result); } } @@ -3134,7 +3178,7 @@ DEFINE_BUILTIN_OP_IMPORTER(NonMaxSuppression) // Create the NMS layer auto* layer = ctx->network()->addNMS(*boxesTensorPtr, *transposedScoresTensorPtr, *maxOutputBoxesPerClassTensorPtr); ASSERT(layer != nullptr && "Failed to create NMS layer.", ErrorCode::kUNSUPPORTED_NODE); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); // Handle the optional threshold inputs if (iouThresholdTensorPtr != nullptr) @@ -3193,12 +3237,7 @@ DEFINE_BUILTIN_OP_IMPORTER(OneHot) CHECK(convertAxis(axis, nbDims+1)); auto* layer = ctx->network()->addOneHot(*indices, *values, *depth, axis); - - auto const outDims = layer->getOutput(0)->getDimensions(); - - ASSERT((outDims.d[axis] != -1) && "OneHot does not support dynamic depth input", ErrorCode::kINVALID_NODE); - - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); RETURN_FIRST_OUTPUT(layer); } @@ -3376,7 +3415,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Pad) return MAKE_ERROR("Unsupported pad mode", ErrorCode::kUNSUPPORTED_NODE); } - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); return {{layer->getOutput(0)}}; } @@ -3402,20 +3441,15 @@ DEFINE_BUILTIN_OP_IMPORTER(PRelu) nvinfer1::ITensor* slopes = &convertToTensor(inputs.at(1), ctx); CHECK(broadcastTensors(ctx, input, slopes)); auto* layer = ctx->network()->addParametricReLU(*input, *slopes); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); RETURN_FIRST_OUTPUT(layer); } -DEFINE_BUILTIN_OP_IMPORTER(QuantizeLinear) -{ - return QuantDequantLinearHelper(ctx, node, inputs, false /*isDQ*/); -} - NodeImportResult randomUniformHelper(IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, ShapeTensor const& inputShape, OnnxAttrs const& attrs, DataType const& inputDType) { auto* fillLayer = addFill(ctx, inputShape, nvinfer1::FillOperation::kRANDOM_UNIFORM); - ctx->registerLayer(fillLayer, getNodeName(node)); + ctx->registerLayer(fillLayer, node); // Set datatype of output: // RandomUniform: dype is required and defaults to 1 @@ -3478,7 +3512,7 @@ NodeImportResult randomNormalHelper(IImporterContext* ctx, ::ONNX_NAMESPACE::Nod ShapeTensor const& inputShape, OnnxAttrs const& attrs, DataType const& inputDType) { auto* fillLayer = addFill(ctx, inputShape, nvinfer1::FillOperation::kRANDOM_NORMAL); - ctx->registerLayer(fillLayer, getNodeName(node)); + ctx->registerLayer(fillLayer, node); // Set datatype of output: // RandomNormal: dype is required and defaults to 1 @@ -3569,7 +3603,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Range) ShapeTensor const numberOfElements = max(ctx, sub(ctx, zero, quotient), zero); nvinfer1::IFillLayer* layer = addFill(ctx, convertTo1D(ctx, numberOfElements), nvinfer1::FillOperation::kLINSPACE); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); // TensorRT requires that alpha and beta both be dynamic or both be static. if (start.allValuesKnown() && delta.allValuesKnown()) @@ -3703,7 +3737,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Resize) ASSERT((inputRank > 0) && "The input tensor cannot be a scalar.", ErrorCode::kUNSUPPORTED_NODE); // Add resize layer nvinfer1::IResizeLayer* layer = ctx->network()->addResize(input); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); OnnxAttrs attrs(node, ctx); auto mode = attrs.get("mode", "nearest"); @@ -3916,119 +3950,38 @@ DEFINE_BUILTIN_OP_IMPORTER(Reshape) // "A dimension could also be 0, in which case the actual dimension // value is unchanged (i.e. taken from the input tensor)." nvinfer1::IShuffleLayer* layer = addShuffle(ctx, data, shape, /*zeroIsPlaceholder=*/!allowZero); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); RETURN_FIRST_OUTPUT(layer); } DEFINE_BUILTIN_OP_IMPORTER(ReverseSequence) { + ASSERT((inputs.size() == 2) && "ReverseSequence expects two input tensors: input and sequence_lens", + ErrorCode::kINVALID_NODE); CHECK(notInvalidType(inputs.at(0), {"UINT8"})); - OnnxAttrs attrs{node, ctx}; - int32_t const batch_axis = attrs.get("batch_axis", 1); - nvinfer1::ITensor* input = &convertToTensor(inputs.at(0), ctx); - auto const dims = input->getDimensions(); - int32_t const rank = dims.nbDims; - // Sequence tensor: indices tensor of rank = 1 and shape = [batchsize] - nvinfer1::ITensor* sequences = &convertToTensor(inputs.at(1), ctx); - std::vector tensors; - // Determine length of batch axis - int32_t const size = isDynamic(sequences->getDimensions()) ? dims.d[batch_axis] : sequences->getDimensions().d[0]; - ASSERT(size != -1 && "This version of TensorRT does not support dynamic ReverseSequence lengths!", - ErrorCode::kUNSUPPORTED_NODE); - - for (int32_t i = 0; i < size; i++) - { - - /* Slice across each element in batch_axis - - For batch_axis = 1 - Starts = {0, i, 0, 0...} - Sizes = {D0, 1, D2, D3...} - Strides = {1, 1, 1, ...} - - For batch_axis = 0 - Starts = {i, 0, 0, 0...} - Sizes = {1, D1, D2, D3...} - Strides = {1, 1, 1, ...} - */ + nvinfer1::ITensor* sequenceLens = &convertToTensor(inputs.at(1), ctx); + auto const inputDims = input->getDimensions(); + auto const sequenceLensDims = sequenceLens->getDimensions(); + ASSERT((inputDims.nbDims >= 2) && "Rank of input must be at least two", ErrorCode::kUNSUPPORTED_NODE); + ASSERT((sequenceLensDims.nbDims == 1) && "Rank of sequence_lens must be one", ErrorCode::kUNSUPPORTED_NODE); - ShapeTensor starts = batch_axis == 0 ? concat(ctx, shapeVector(i), shapeVector(0)) - : concat(ctx, shapeVector(0), shapeVector(i)); - ShapeTensor sizes = batch_axis == 0 - ? concat(ctx, shapeVector(1), ShapeTensor(*getAxisLength(ctx, input, 1, {1, {1}}))) - : concat(ctx, ShapeTensor(*getAxisLength(ctx, input, 0, {1, {1}})), shapeVector(1)); - ShapeTensor strides = fillShapeVector(ctx, 1, shapeVector(rank)); - - for (int32_t j = 2; j < rank; j++) - { - starts = concat(ctx, starts, shapeVector(0)); - sizes = concat(ctx, sizes, ShapeTensor(*getAxisLength(ctx, input, j, {1, {1}}))); - } - - auto s1 = addSlice(ctx, *input, starts, sizes, strides); - nvinfer1::ITensor* data = s1->getOutput(0); - data = squeezeTensor(ctx, node, *data, {batch_axis}); - // Get sequence length for the current slice - auto seqIndex = ctx->network()->addSlice(*sequences, {1, {i}}, {1, {1}}, {1, {1}})->getOutput(0); - - // First slice = slices data[seqIndex - 1 : 0 : -1] on axis 0 - /* - Starts = {seqIndex - 1, 0, 0 ...} - Sizes = {seqIndex, D1, D2, ...} - Strides = {-1, 1, 1, ...} - */ - - int32_t sliceRank = data->getDimensions().nbDims; - starts = sub(ctx, ShapeTensor(*seqIndex), shapeVector(1)); - ShapeTensor startsFill = fillShapeVector(ctx, 0, shapeVector(sliceRank - 1)); - starts = concat(ctx, starts, startsFill); - - sizes = ShapeTensor(*seqIndex); - for (int32_t j = 1; j < sliceRank; j++) - { - sizes = concat(ctx, sizes, ShapeTensor(*getAxisLength(ctx, data, j, {1, {1}}))); - } - - strides = shapeVector(-1); - ShapeTensor stridesFill = fillShapeVector(ctx, 1, shapeVector(sliceRank - 1)); - strides = concat(ctx, strides, stridesFill); - - auto firstSlice = addSlice(ctx, *data, starts, sizes, strides); - auto slice1 = firstSlice->getOutput(0); - - // Second slice = slices data[seqIndex:end:1] on axis 0 - - /* - Starts = {seqIndex, 0, 0 ... 0} - Sizes = {D0 - seqIndex, D1, D2 ...} - Strides = {1, 1, 1, 1 ...} - */ - - starts = ShapeTensor(*seqIndex); - startsFill = fillShapeVector(ctx, 0, shapeVector(sliceRank - 1)); - starts = concat(ctx, starts, startsFill); - - sizes = sub(ctx, ShapeTensor(*getAxisLength(ctx, data, 0, {1, {1}})), ShapeTensor(*seqIndex)); - for (int32_t j = 1; j < sliceRank; j++) - { - sizes = concat(ctx, sizes, ShapeTensor(*getAxisLength(ctx, data, j, {1, {1}}))); - } - - strides = fillShapeVector(ctx, 1, shapeVector(sliceRank)); + OnnxAttrs attrs{node, ctx}; + int32_t const batchAxis = attrs.get("batch_axis", 1); + int32_t const sequenceAxis = attrs.get("time_axis", 0); + ASSERT((batchAxis >= 0 && batchAxis <= inputDims.nbDims) && "Invalid batch_axis", ErrorCode::kUNSUPPORTED_NODE); + ASSERT( + (sequenceAxis >= 0 && sequenceAxis <= inputDims.nbDims) && "Invalid time_axis", ErrorCode::kUNSUPPORTED_NODE); + ASSERT((batchAxis != sequenceAxis) && "batch_axis and time_axis cannot be the same", ErrorCode::kUNSUPPORTED_NODE); - auto secondSlice = addSlice(ctx, *data, starts, sizes, strides); - auto slice2 = secondSlice->getOutput(0); + auto layer = ctx->network()->addReverseSequence(*input, *sequenceLens); + ctx->registerLayer(layer, node); + ASSERT(layer && "Failed to add ReverseSequence layer.", ErrorCode::kUNSUPPORTED_NODE); - // Concat the two slices together - std::vector slices{slice1, slice2}; - auto fullSliceLayer = ctx->network()->addConcatenation(slices.data(), slices.size()); - tensors.emplace_back(unsqueezeTensor(ctx, node, *fullSliceLayer->getOutput(0), {batch_axis})); - } + layer->setBatchAxis(batchAxis); + layer->setSequenceAxis(sequenceAxis); - auto concatLayer = ctx->network()->addConcatenation(tensors.data(), tensors.size()); - concatLayer->setAxis(batch_axis); - RETURN_FIRST_OUTPUT(concatLayer); + RETURN_FIRST_OUTPUT(layer); } DEFINE_BUILTIN_OP_IMPORTER(RNN) @@ -4140,7 +4093,7 @@ DEFINE_BUILTIN_OP_IMPORTER(RNN) // H(t-1) nvinfer1::IRecurrenceLayer* hiddenState = loop->addRecurrence(*initialHidden); - ctx->registerLayer(hiddenState, getNodeName(node)); + ctx->registerLayer(hiddenState, node); LOG_VERBOSE("Hidden state shape: " << hiddenState->getOutput(0)->getDimensions()); // Compute intermediate(t) = (X(t) * W^T + H(t-1) * R^T + (Wb + Rb)). @@ -4247,14 +4200,16 @@ DEFINE_BUILTIN_OP_IMPORTER(RoiAlign) f.emplace_back("spatial_scale", &spatialScale, nvinfer1::PluginFieldType::kFLOAT32, 1); // Create plugin from registry - auto const plugin = createPlugin(getNodeName(node), importPluginCreator(pluginName, pluginVersion), f); + auto const plugin = createPlugin(getNodeName(node), importPluginCreator(ctx, pluginName, pluginVersion), f); - ASSERT(plugin != nullptr && "ROIAlign plugin was not found in the plugin registry!", - ErrorCode::kUNSUPPORTED_NODE); + ASSERT(plugin != nullptr && "ROIAlign plugin was not found in the plugin registry!", ErrorCode::kUNSUPPORTED_NODE); nvinfer1::ITensor* const inputTensorsPtr[3] = {tensorPtr, roisPtr, batchIndicesPtr}; auto* layer = ctx->network()->addPluginV2(inputTensorsPtr, 3, *plugin); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); + + // ROIAlign requires nvinfer_vc_plugin when using VC. + ctx->addUsedVCPluginLibrary(node, pluginName.c_str(), "nvinfer_vc_plugin"); RETURN_FIRST_OUTPUT(layer); } @@ -4335,7 +4290,8 @@ DEFINE_BUILTIN_OP_IMPORTER(Scan) stateVars.emplace_back(loop->addRecurrence(convertToTensor(inputs.at(i + opset8Offset), ctx))); ctx->registerTensor(TensorOrWeights{stateVars.back()->getOutput(0)}, body.input(i).name()); } - ctx->registerLayer(stateVars.at(0), getNodeName(node)); + ctx->registerLayer(stateVars.at(0), node); + for (int32_t i = 0; i < nbScanInputs; ++i) { const int32_t index = nbStateVars + i; // Scan Inputs are after the state variables. @@ -4400,7 +4356,7 @@ DEFINE_BUILTIN_OP_IMPORTER(GridSample) // Add grid sample layer nvinfer1::IGridSampleLayer* layer = ctx->network()->addGridSample(input, grid); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); OnnxAttrs attrs(node, ctx); auto paddingMode = attrs.get("padding_mode", "zeros"); @@ -4479,7 +4435,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Shape) { nvinfer1::ITensor& input = convertToTensor(inputs.at(0), ctx); auto* layer = ctx->network()->addShape(input); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); RETURN_FIRST_OUTPUT(layer); } @@ -4603,7 +4559,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Slice) nvinfer1::ISliceLayer* slice = addSlice(ctx, data, starts, sizes, steps); - ctx->registerLayer(slice, getNodeName(node)); + ctx->registerLayer(slice, node); RETURN_FIRST_OUTPUT(slice); } @@ -4660,7 +4616,7 @@ DEFINE_BUILTIN_OP_IMPORTER(SpaceToDepth) auto* firstShuffle = addShuffle(ctx, *tensorPtr, firstShapeDims); firstShuffle->setSecondTranspose(perm); - ctx->registerLayer(firstShuffle, getNodeName(node)); + ctx->registerLayer(firstShuffle, node); tensorPtr = firstShuffle->getOutput(0); // Reshape to {N, C * blockSize * blockSize, H / blockSize, W / blockSize} @@ -4767,7 +4723,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Split) } nvinfer1::ISliceLayer* slice = addSlice(ctx, inputTensor, starts, sizes, ones); - ctx->registerLayer(slice, getNodeName(node)); + ctx->registerLayer(slice, node); outputs.emplace_back(slice->getOutput(0)); } @@ -4883,7 +4839,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Tile) ShapeTensor outputShape = mul(ctx, inputDims, repeats); nvinfer1::ISliceLayer* tile = addSlice(ctx, input, similar(ctx, inputDims, 0), outputShape, similar(ctx, inputDims, 1)); - ctx->registerLayer(tile, getNodeName(node)); + ctx->registerLayer(tile, node); tile->setMode(nvinfer1::SliceMode::kWRAP); RETURN_FIRST_OUTPUT(tile); @@ -4891,24 +4847,16 @@ DEFINE_BUILTIN_OP_IMPORTER(Tile) DEFINE_BUILTIN_OP_IMPORTER(TopK) { - CHECK(notInvalidType(inputs.at(0), {"INT32", "UINT8"})); + CHECK(notInvalidType(inputs.at(0), {"UINT8"})); nvinfer1::ITensor* tensorPtr = &convertToTensor(inputs.at(0), ctx); OnnxAttrs attrs(node, ctx); int32_t axis = attrs.get("axis", -1); - int32_t k; - if (ctx->getOpsetVersion() >= 10) - { - assertIsWeights(inputs.at(1), "This version of TensorRT only supports input K as an initializer."); - ASSERT((inputs.at(1).weights().count() == 1) && "The input K must contain exactly 1 value.", - ErrorCode::kUNSUPPORTED_NODE); - k = *static_cast(inputs.at(1).weights().values); - } - else + int32_t k{1}; + if (ctx->getOpsetVersion() < 10) { - ASSERT( (attrs.count("k")) && "Attribute k is missing.", ErrorCode::kINVALID_NODE); + ASSERT((attrs.count("k")) && "Attribute k is missing.", ErrorCode::kINVALID_NODE); k = attrs.get("k"); } - int32_t nbDims = tensorPtr->getDimensions().nbDims; CHECK(convertAxis(axis, nbDims)); uint32_t axisMask = 1 << axis; @@ -4921,6 +4869,14 @@ DEFINE_BUILTIN_OP_IMPORTER(TopK) tensorPtr = unsqueezeTensor(ctx, node, *tensorPtr, axes); ASSERT(tensorPtr && "Failed to unsqueeze input x.", ErrorCode::kUNSUPPORTED_NODE); } + bool needCast = tensorPtr->getType() == nvinfer1::DataType::kINT32; + if (needCast) + { + LOG_WARNING( + "TensorRT is using FLOAT32 precision to run an INT32 TopK. Rounding errors may occur for large integer " + "values"); + tensorPtr = castHelper(ctx, tensorPtr, nvinfer1::DataType::kFLOAT); + } // Default is top max k. auto operation = nvinfer1::TopKOperation::kMAX; @@ -4933,7 +4889,14 @@ DEFINE_BUILTIN_OP_IMPORTER(TopK) } } nvinfer1::ITopKLayer* layer = ctx->network()->addTopK(*tensorPtr, operation, k, axisMask); - ctx->registerLayer(layer, getNodeName(node)); + if (ctx->getOpsetVersion() >= 10) + { + ASSERT((inputs.size() == 2) && "Expects two input tensors for opset >= 10: X and K", ErrorCode::kINVALID_NODE); + nvinfer1::ITensor* kPtr = &convertToTensor(inputs.at(1), ctx); + kPtr = convertToScalar(ctx, kPtr); + layer->setInput(1, *kPtr); + } + ctx->registerLayer(layer, node); ASSERT(layer && "Failed to add TopK layer.", ErrorCode::kUNSUPPORTED_NODE); nvinfer1::ITensor* values = layer->getOutput(0); @@ -4949,6 +4912,11 @@ DEFINE_BUILTIN_OP_IMPORTER(TopK) ASSERT(indices && "Failed to squeeze the input indices.", ErrorCode::kUNSUPPORTED_NODE); } + if (needCast) + { + values = castHelper(ctx, values, nvinfer1::DataType::kINT32); + } + return {{values, indices}}; } @@ -5153,7 +5121,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Upsample) } layer->setScales(scale_factors.data(), nbDims); } - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); layer->setResizeMode(resizeMode); layer->setSelectorForSinglePixel(nvinfer1::ResizeSelector::kFORMULA); layer->setNearestRounding(nvinfer1::ResizeRoundMode::kFLOOR); @@ -5179,7 +5147,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Where) ASSERT( (cDims.nbDims == yDims.nbDims) && "The shape of the condition input tensor must be the same of the input y tensor.", ErrorCode::kUNSUPPORTED_NODE); auto* layer = ctx->network()->addSelect(*condition, *x, *y); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); RETURN_FIRST_OUTPUT(layer); } @@ -5388,7 +5356,7 @@ DEFINE_BUILTIN_OP_IMPORTER(Shrink) = &elementwiseHelper(ctx, node, {x, biasTensor}, nvinfer1::ElementWiseOperation::kSUM).value().at(0).tensor(); auto* layer = ctx->network()->addSelect(*xLessThanMinusLambd, *xAddBias, *output); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); // cast back to originalType return {{castHelper(ctx, layer->getOutput(0), originalType)}}; @@ -5417,7 +5385,7 @@ DEFINE_BUILTIN_OP_IMPORTER(NonZero) ASSERT((x->getType() == DataType::kFLOAT || x->getType() == DataType::kHALF || x->getType() == DataType::kINT32 || x->getType() == DataType::kINT8 || x->getType() == DataType::kBOOL) && "Only FLOAT, HALF, INT32, INT8 or BOOL input is supported for the NonZero operator in this version of TensorRT", ErrorCode::kUNSUPPORTED_NODE); auto* layer = ctx->network()->addNonZero(*x); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); RETURN_FIRST_OUTPUT(layer); } @@ -5431,7 +5399,7 @@ DEFINE_BUILTIN_OP_IMPORTER(FallbackPluginImporter) LOG_INFO("Searching for plugin: " << pluginName << ", plugin_version: " << pluginVersion << ", plugin_namespace: " << pluginNamespace); - nvinfer1::IPluginCreator* creator = importPluginCreator(pluginName, pluginVersion, pluginNamespace); + nvinfer1::IPluginCreator* creator = importPluginCreator(ctx, pluginName, pluginVersion, pluginNamespace); ASSERT(creator && "Plugin not found, are the plugin name, version, and namespace correct?", ErrorCode::kUNSUPPORTED_NODE); @@ -5450,7 +5418,7 @@ DEFINE_BUILTIN_OP_IMPORTER(FallbackPluginImporter) } LOG_INFO("Successfully created plugin: " << pluginName); auto* layer = ctx->network()->addPluginV2(pluginInputs.data(), pluginInputs.size(), *plugin); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); RETURN_ALL_OUTPUTS(layer); } @@ -5498,7 +5466,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_Scale) } nvinfer1::IScaleLayer* layer = ctx->network()->addScale(input, mode, shift, scale, power); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); RETURN_FIRST_OUTPUT(layer); } @@ -5513,7 +5481,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_Shuffle) bool zeroIsPlaceholder = attrs.get("zero_is_placeholder"); nvinfer1::IShuffleLayer* layer = ctx->network()->addShuffle(input); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); layer->setFirstTranspose(perm1); layer->setSecondTranspose(perm2); layer->setZeroIsPlaceholder(zeroIsPlaceholder); @@ -5548,7 +5516,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_TopK_Min) int32_t axes = 1 << (attrs.get("axis")); nvinfer1::ITopKLayer* layer = ctx->network()->addTopK(input, nvinfer1::TopKOperation::kMIN, k, axes); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); RETURN_ALL_OUTPUTS(layer); } @@ -5565,7 +5533,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_MatMul) nvinfer1::MatrixOperation op1 = attrs.get("op_1"); nvinfer1::IMatrixMultiplyLayer* layer = ctx->network()->addMatrixMultiply(input0, op0, input1, op1); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); RETURN_FIRST_OUTPUT(layer); } @@ -5600,7 +5568,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_RNNv2) int32_t counter = 1; nvinfer1::IRNNv2Layer* layer = ctx->network()->addRNNv2(input, layerCount, hiddenSize, maxSeqLen, op); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); layer->setInputMode(inputMode); layer->setDirection(direction); @@ -5674,7 +5642,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_RaggedSoftmax) auto& bounds = inputs.at(1).tensor(); nvinfer1::IRaggedSoftMaxLayer* layer = ctx->network()->addRaggedSoftMax(input, bounds); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); RETURN_FIRST_OUTPUT(layer); } @@ -5699,7 +5667,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_FullyConnected) nvinfer1::IFullyConnectedLayer* layer = ctx->network()->addFullyConnected(input, nbChannels, kernelWeights, biasWeights); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); RETURN_FIRST_OUTPUT(layer); } @@ -5722,7 +5690,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_MaxAverageBlendPool) nvinfer1::IPoolingLayer* layer = ctx->network()->addPoolingNd(input, nvinfer1::PoolingType::kMAX_AVERAGE_BLEND, kernelSize); ASSERT(layer && "Failed to create a Pooling layer.", ErrorCode::kUNSUPPORTED_NODE); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); layer->setStrideNd(strides); layer->setAverageCountExcludesPadding(exclude_padding); layer->setPaddingMode(paddingMode); @@ -5760,7 +5728,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_PluginV2) auto const plugin = creator->deserializePlugin("", buffer.data(), buffer.size()); nvinfer1::IPluginV2Layer* layer = ctx->network()->addPluginV2(tensors.data(), tensors.size(), *plugin); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); RETURN_ALL_OUTPUTS(layer); } #endif // ENABLE_STD_PLUGIN @@ -5788,7 +5756,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_Gather) } nvinfer1::IGatherLayer* layer = ctx->network()->addGather(data, indices, axis); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); layer->setNbElementWiseDims(nbElementWiseDims); RETURN_FIRST_OUTPUT(layer); } @@ -5818,7 +5786,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_Slice) ShapeTensor const stride{ctx, inputs.at(3)}; layer = addSlice(ctx, input, start, size, stride); } - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); RETURN_FIRST_OUTPUT(layer); } @@ -5829,7 +5797,7 @@ DEFINE_BUILTIN_OP_IMPORTER(TRT_Resize) nvinfer1::IResizeLayer* layer; layer = ctx->network()->addResize(input); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); OnnxAttrs attrs(node, ctx); auto const mode = attrs.get("mode"); diff --git a/docs/Changelog.md b/docs/Changelog.md index db420c05..468ae969 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -2,6 +2,23 @@ # ONNX-TensorRT Changelog +# TensorRT 8.6 EA Release - 2023-3-13 + +## Added + +For more details, see the 8.6 EA release notes for new features added in TensorRT 8.6. + +- Added support for `GroupNormalization`, `LayerNormalization`, `IsInf` operations +- Added support for INT32 input types for `Argmin`, `Argmax`, and `TopK` +- Added support for `ReverseSequence` operators with dynamic shapes +- Added support for `TopK` operators with dynamic `K` values +- Added `OnnxParserFlag` enum and `setFlag` interfaces to the ONNX parser to modify the default parsing behavior +- Added metadata tracking, now ONNX node metadata will be embedded into TensorRT layers + +## Changed + +- All cast operations will now use the new `CastLayer` over the pervious `IdentityLayer`. + # TensorRT 8.5 GA Release - 2022-11-2 ## Added diff --git a/docs/operators.md b/docs/operators.md index 26ebcb3b..6ab1651d 100644 --- a/docs/operators.md +++ b/docs/operators.md @@ -2,7 +2,7 @@ # Supported ONNX Operators -TensorRT 8.5 supports operators up to Opset 17. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/master/docs/Operators.md) +TensorRT 8.6 supports operators up to Opset 17. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/master/docs/Operators.md) TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, INT8, and BOOL @@ -19,8 +19,8 @@ See below for the support matrix of ONNX operators in ONNX-TensorRT. | Acosh | Y | FP32, FP16 | | Add | Y | FP32, FP16, INT32 | | And | Y | BOOL | -| ArgMax | Y | FP32, FP16 | -| ArgMin | Y | FP32, FP16 | +| ArgMax | Y | FP32, FP16, INT32 | +| ArgMin | Y | FP32, FP16, INT32 | | Asin | Y | FP32, FP16 | | Asinh | Y | FP32, FP16 | | Atan | Y | FP32, FP16 | @@ -30,7 +30,7 @@ See below for the support matrix of ONNX operators in ONNX-TensorRT. | Bernoulli | N | | BitShift | N | | BlackmanWindow | N | -| Cast | Y | FP32, FP16, INT32, INT8, BOOL | | +| Cast | Y | FP32, FP16, INT32, INT8, UINT8, BOOL | | | Ceil | Y | FP32, FP16 | | Celu | Y | FP32, FP16 | | Clip | Y | FP32, FP16, INT8 | | @@ -70,7 +70,8 @@ See below for the support matrix of ONNX operators in ONNX-TensorRT. | GlobalMaxPool | Y | FP32, FP16, INT8 | | Greater | Y | FP32, FP16, INT32 | | GreaterOrEqual | Y | FP32, FP16, INT32 | -| GridSample | Y | FP32, FP16 +| GridSample | Y | FP32, FP16 | +| GroupNormalization | Y | FP32, FP16 | | GRU | Y | FP32, FP16 | For bidirectional GRUs, activation functions must be the same for both the forward and reverse pass | HammingWindow | N | | HannWindow | N | @@ -80,10 +81,10 @@ See below for the support matrix of ONNX operators in ONNX-TensorRT. | Identity | Y | FP32, FP16, INT32, INT8, BOOL | | If | Y | FP32, FP16, INT32, BOOL | Output tensors of the two conditional branches must have broadcastable shapes, and must have different names | ImageScaler | Y | FP32, FP16 | -| InstanceNormalization | Y | FP32, FP16 | Scales `scale` and biases `B` must be initializers. Input rank must be >=3 & <=5 | -| IsInf | N | +| InstanceNormalization | Y | FP32, FP16 | +| IsInf | Y | FP32, FP16 | IsNaN | Y | FP32, FP16, INT32 | -| LayerNormalization | N | +| LayerNormalization | Y | FP32, FP16 | LeakyRelu | Y | FP32, FP16, INT8 | | Less | Y | FP32, FP16, INT32 | | LessOrEqual | Y | FP32, FP16, INT32 | @@ -143,7 +144,7 @@ See below for the support matrix of ONNX operators in ONNX-TensorRT. | Relu | Y | FP32, FP16, INT8 | | Reshape | Y | FP32, FP16, INT32, INT8, BOOL | | Resize | Y | FP32, FP16 | Supported resize transformation modes: `half_pixel`, `pytorch_half_pixel`, `tf_half_pixel_for_nn`, `asymmetric`, and `align_corners`.
Supported resize modes: `nearest`, `linear`.
Supported nearest modes: `floor`, `ceil`, `round_prefer_floor`, `round_prefer_ceil` | -| ReverseSequence | Y | FP32, FP16 | Dynamic input shapes are unsupported +| ReverseSequence | Y | FP32, FP16, INT32, INT8, BOOL | | RNN | Y | FP32, FP16 | For bidirectional RNNs, activation functions must be the same for both the forward and reverse pass | RoiAlign | Y | FP32, FP16 | | Round | Y | FP32, FP16, INT8 | @@ -186,7 +187,7 @@ See below for the support matrix of ONNX operators in ONNX-TensorRT. | TfIdfVectorizer | N | | ThresholdedRelu | Y | FP32, FP16, INT8 | | Tile | Y | FP32, FP16, INT32, BOOL | -| TopK | Y | FP32, FP16 | `K` input must be an initializer +| TopK | Y | FP32, FP16, INT32 | | Transpose | Y | FP32, FP16, INT32, INT8, BOOL | | Trilu | Y | FP32, FP16, INT32, INT8, BOOL | | Unique | N | diff --git a/onnx2trt.hpp b/onnx2trt.hpp index e4783875..4a732c2c 100644 --- a/onnx2trt.hpp +++ b/onnx2trt.hpp @@ -47,9 +47,15 @@ class IImporterContext virtual StringMap& loopTensors() = 0; virtual void setOnnxFileLocation(std::string location) = 0; virtual std::string getOnnxFileLocation() = 0; - virtual void registerTensor(TensorOrWeights tensor, const std::string& basename, bool const checkUniqueName = false) - = 0; - virtual void registerLayer(nvinfer1::ILayer* layer, const std::string& basename) = 0; + virtual void registerTensor(TensorOrWeights tensor, std::string const& basename, bool const checkUniqueName = false) = 0; + + //! Register a layer, which ensures it has a unique name. + //! If node!=nullptr, set the metadata for the layer to the node's name. + virtual void registerLayer(nvinfer1::ILayer* layer, std::string const& basename, ::ONNX_NAMESPACE::NodeProto const* node) = 0; + + //! Short form of register layer to use when the basename is the node's name. + virtual void registerLayer(nvinfer1::ILayer* layer, ::ONNX_NAMESPACE::NodeProto const& node) = 0; + virtual ShapedWeights createTempWeights(ShapedWeights::DataType type, nvinfer1::Dims shape, uint8_t value = 0) = 0; virtual int64_t getOpsetVersion(const char* domain = "") const = 0; virtual nvinfer1::ILogger& logger() = 0; @@ -57,11 +63,25 @@ class IImporterContext virtual nvinfer1::IErrorRecorder* getErrorRecorder() const = 0; virtual nvinfer1::IConstantLayer* getConstantLayer(const char* name) const = 0; + virtual void setFlags(nvonnxparser::OnnxParserFlags const& onnxParserFlags) = 0; + virtual nvonnxparser::OnnxParserFlags getFlags() const = 0; + //! Push a new scope for base names (ONNX names). virtual void pushBaseNameScope() = 0; //! Revert actions of registerTensor for names in the top scope and pop it. virtual void popBaseNameScope() = 0; + + //! Declare the given node requires a plugin library for the given pluginName, which is provided by the + //! logical library name pluginLib (should correspond to the DLL/DSO name with suffix and "lib" prefix stripped, + //! e.g. nvinfer_vc_plugin for libnvinfer_vc_plugin.so.8). + virtual void addUsedVCPluginLibrary( + ::ONNX_NAMESPACE::NodeProto const& node, char const* pluginName, char const* pluginLib) + = 0; + + // Returns a list of strings corresponding to paths to the used VC plugins on disk. May throw on error. + virtual std::vector getUsedVCPluginLibraries() = 0; + protected: virtual ~IImporterContext() {} }; diff --git a/onnx2trt_utils.cpp b/onnx2trt_utils.cpp index e981aa07..b7d19a34 100644 --- a/onnx2trt_utils.cpp +++ b/onnx2trt_utils.cpp @@ -4,6 +4,7 @@ #include "onnx2trt_utils.hpp" #include "OnnxAttrs.hpp" +#include "NvInferSafeRuntime.h" #include namespace onnx2trt @@ -39,7 +40,7 @@ NodeImportResult activationHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE: { layer->setBeta(*beta); } - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); return {{layer->getOutput(0)}}; } @@ -58,19 +59,29 @@ nvinfer1::ITensor* addClip(IImporterContext* ctx, nvinfer1::ITensor* input, floa NodeImportResult argMinMaxHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, std::vector& inputs, nvinfer1::TopKOperation op) { - CHECK(notInvalidType(inputs.at(0), {"INT32", "UINT8"})); - nvinfer1::ITensor& tensor = convertToTensor(inputs.at(0), ctx); + CHECK(notInvalidType(inputs.at(0), {"UINT8"})); + nvinfer1::ITensor* tensor = &convertToTensor(inputs.at(0), ctx); + + bool needCast = tensor->getType() == nvinfer1::DataType::kINT32; + if (needCast) + { + LOG_WARNING( + "TensorRT is using FLOAT32 precision to run an INT32 ArgMax / ArgMin. Rounding errors may occur for large " + "integer values"); + tensor = castHelper(ctx, tensor, nvinfer1::DataType::kFLOAT); + } + // Get attributes. OnnxAttrs attrs(node, ctx); - int keepdims = attrs.get("keepdims", 1); - int axis = attrs.get("axis", 0); - int selectLastIndex = attrs.get("select_last_index", 0); + int32_t keepdims = attrs.get("keepdims", 1); + int32_t axis = attrs.get("axis", 0); + int32_t selectLastIndex = attrs.get("select_last_index", 0); ASSERT((!selectLastIndex || (selectLastIndex && ctx->getOpsetVersion() >= 12)) && "Per-opset 12 ONNX does not support the select_last_index attribute.", ErrorCode::kUNSUPPORTED_NODE); // Insert a TopK layer with k set to 1. - int nbDims = tensor.getDimensions().nbDims; + int32_t nbDims = tensor->getDimensions().nbDims; CHECK(convertAxis(axis, nbDims)); uint32_t axisMask = 1 << axis; nvinfer1::ITopKLayer* layer; @@ -81,7 +92,7 @@ NodeImportResult argMinMaxHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE:: if (selectLastIndex) { // Need to flip the data input along the given axis using the Slice operator - const auto dims = shapeOf(tensor); + auto const dims = shapeOf(*tensor); ShapeTensor starts = shapeVector(-1); ShapeTensor ends = shapeVector(static_cast(INT_MIN)); ShapeTensor axes = shapeVector(axis); @@ -91,26 +102,27 @@ NodeImportResult argMinMaxHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE:: { // axes specify a subset of the dimensions, or out of order. // Convert starts/ends/steps to complete in-order form. - const ShapeTensor subscripts{axesToInterlaceSubscripts(axes, dims.size())}; + ShapeTensor const subscripts{axesToInterlaceSubscripts(axes, dims.size())}; starts = interlace(ctx, similar(ctx, dims, 0), starts, subscripts); ends = interlace(ctx, dims, ends, subscripts); steps = interlace(ctx, similar(ctx, dims, 1), steps, subscripts); } decodeOnnxStartsAndEnds(ctx, dims, steps, starts, ends); // TensorRT uses sizes of the output dimensions instead of ends. - const ShapeTensor sizes = computeSliceSizes(ctx, starts, ends, steps, dims); + ShapeTensor const sizes = computeSliceSizes(ctx, starts, ends, steps, dims); - nvinfer1::ISliceLayer* slice = addSlice(ctx, tensor, starts, sizes, steps); + nvinfer1::ISliceLayer* slice = addSlice(ctx, *tensor, starts, sizes, steps); nvinfer1::ITensor& flippedTensor = *slice->getOutput(0); layer = ctx->network()->addTopK(flippedTensor, op, 1, axisMask); } else { - layer = ctx->network()->addTopK(tensor, op, 1, axisMask); + layer = ctx->network()->addTopK(*tensor, op, 1, axisMask); } - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); ASSERT(layer && "Failed to register layer.", ErrorCode::kUNSUPPORTED_NODE); + // We don't care about the TopK values, just the indices. nvinfer1::ITensor* indices = layer->getOutput(1); indices->setType(nvinfer1::DataType::kINT32); @@ -121,7 +133,7 @@ NodeImportResult argMinMaxHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE:: if (selectLastIndex) { // Use shapeTensor semantics to support dynamic shapes - auto const dims = shapeOf(tensor); + auto const dims = shapeOf(*tensor); auto const indicesDims = shapeOf(*indices); auto const axisTensor = shapeVector(axis); auto const dimOnAxis = gather(ctx, dims, axisTensor); @@ -143,7 +155,7 @@ NodeImportResult argMinMaxHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE:: else { // Otherwise, we need to squeeze the axis dimension - std::vector axes{axis}; + std::vector axes{axis}; indices = squeezeTensor(ctx, node, *indices, axes); return {{indices}}; } @@ -310,28 +322,13 @@ nvinfer1::ITensor* castHelper(IImporterContext* ctx, nvinfer1::ITensor* input, n nvinfer1::ITensor* constantOfShape(IImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, nvinfer1::ITensor* constant, nvinfer1::ITensor* shape) { - int rank = shape->getDimensions().d[0]; - - std::vector starts(rank); - std::fill(starts.begin(), starts.end(), 0); - - nvinfer1::Dims strides{rank}; - std::fill(strides.d, strides.d + strides.nbDims, 0); - - // Slice will not work if constant does not have the same rank as start/size/strides. - nvinfer1::Dims unsqueezeDims{rank}; - std::fill(unsqueezeDims.d, unsqueezeDims.d + unsqueezeDims.nbDims, 1); - nvinfer1::IShuffleLayer* unsqueeze = ctx->network()->addShuffle(*constant); - unsqueeze->setReshapeDimensions(unsqueezeDims); - unsqueeze->setZeroIsPlaceholder(false); - constant = unsqueeze->getOutput(0); - - nvinfer1::ISliceLayer* broadcast = ctx->network()->addSlice(*constant, nvinfer1::Dims{}, nvinfer1::Dims{}, strides); - broadcast->setInput(1, - *addConstant(ctx, starts, ::ONNX_NAMESPACE::TensorProto_DataType_INT32, nvinfer1::Dims{1, rank})->getOutput(0)); - broadcast->setInput(2, *shape); - ctx->registerLayer(broadcast, getNodeName(node)); - return broadcast->getOutput(0); + ShapeTensor shapeT{*shape}; + ShapeTensor zeros = similar(ctx, shapeT, 0); + // `constant` must be broadcasted to the same rank as `shape`. + ShapeTensor broadcastedShape = similar(ctx, shapeT, 1); + constant = &reshape(ctx, *constant, broadcastedShape); + auto l = addSlice(ctx, *constant, zeros, shapeT, zeros); + return l->getOutput(0); } Status convertAxis(int& axis, int nbDims) @@ -369,13 +366,13 @@ bool convertDtype(int32_t onnx_dtype, nvinfer1::DataType* trt_dtype) int32_t* convertINT64(const int64_t* weightValues, nvinfer1::Dims shape, IImporterContext* ctx) { - static bool logged = false; - if (!logged) + auto ctxImpl = static_cast(ctx); + if (!ctxImpl->isConvertINT64Logged()) { LOG_WARNING( "Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. " "Attempting to cast down to INT32."); - logged = true; + ctxImpl->setConvertINT64Logged(true); } const size_t nbWeights = volume(shape); @@ -398,9 +395,10 @@ int32_t* convertINT64(const int64_t* weightValues, nvinfer1::Dims shape, IImport int32Weights[i] = static_cast(weightValues[i]); } } - if (outOfBounds) + if (outOfBounds && !ctxImpl->isConvertINT64OutOfBoundsLogged()) { LOG_WARNING("One or more weights outside the range of INT32 was clamped"); + ctxImpl->setConvertINT64OutOfBoundsLogged(true); } return int32Weights; @@ -507,14 +505,14 @@ int32_t* convertUINT8(const uint8_t* weightValues, nvinfer1::Dims shape, IImport float* convertDouble(const double* weightValues, nvinfer1::Dims shape, IImporterContext* ctx) { - static bool logged = false; - if (!logged) + auto ctxImpl = static_cast(ctx); + if (!ctxImpl->isConvertDoubleLogged()) { LOG_WARNING( "Your ONNX model has been generated with double-typed weights, while TensorRT does not natively support " "double. " "Attempting to cast down to float."); - logged = true; + ctxImpl->setConvertDoubleLogged(true); } const size_t nbWeights = volume(shape); float* floatWeights{ @@ -537,9 +535,10 @@ float* convertDouble(const double* weightValues, nvinfer1::Dims shape, IImporter floatWeights[i] = static_cast(weightValues[i]); } } - if (outOfBounds) + if (outOfBounds && !ctxImpl->isConvertDoubleOutOfBoundsLogged()) { LOG_WARNING("One or more weights outside the range of FLOAT was clamped"); + ctxImpl->setConvertDoubleOutOfBoundsLogged(true); } return floatWeights; @@ -605,42 +604,50 @@ bool convertOnnxWeights( // For weights parsed from external files, createTempWeights is necessary to keep them in scope ShapedWeights externalWeights; + dataPtr = dataBuf.data(); // Cast non-native TRT types to their corresponding proxy types if (onnxDtype == ::ONNX_NAMESPACE::TensorProto::INT64) { - dataPtr = dataBuf.data(); + // Cast INT64 weights to INT32. dataPtr = convertINT64(reinterpret_cast(dataPtr), shape, ctx); nbytes = nbytes / (sizeof(int64_t) / sizeof(int32_t)); onnxDtype = ::ONNX_NAMESPACE::TensorProto::INT32; - externalWeights = ctx->createTempWeights(onnxDtype, shape); - std::memcpy(externalWeights.values, dataPtr, nbytes); } else if (onnxDtype == ::ONNX_NAMESPACE::TensorProto::UINT8) { - dataPtr = dataBuf.data(); + // Cast UINT8 weights to INT32. dataPtr = convertUINT8(reinterpret_cast(dataPtr), shape, ctx); nbytes = nbytes * (sizeof(int32_t) / sizeof(uint8_t)); onnxDtype = ::ONNX_NAMESPACE::TensorProto::INT32; - externalWeights = ctx->createTempWeights(onnxDtype, shape); - std::memcpy(externalWeights.values, dataPtr, nbytes); } else if (onnxDtype == ::ONNX_NAMESPACE::TensorProto::DOUBLE) { - dataPtr = dataBuf.data(); + // Cast DOUBLE weights to FLOAT. dataPtr = convertDouble(reinterpret_cast(dataPtr), shape, ctx); nbytes = nbytes / (sizeof(double) / sizeof(float)); onnxDtype = ::ONNX_NAMESPACE::TensorProto::FLOAT; - externalWeights = ctx->createTempWeights(onnxDtype, shape); - std::memcpy(externalWeights.values, dataPtr, nbytes); } - // Copy weight values directly to externalWeights - else + + // Create the holder for external weights. + externalWeights = ctx->createTempWeights(onnxDtype, shape); + + // Check if the size of external weights is as expected. + if (externalWeights.size_bytes() != nbytes) { - externalWeights = ctx->createTempWeights(onnxDtype, shape); - std::memcpy(externalWeights.values, dataBuf.data(), nbytes); + LOG_ERROR("Unexpected size for the external weights! Expected size: " + << externalWeights.size_bytes() + << " bytes (shape = " + << shape + << "). Actual size: " + << nbytes + << " bytes."); + return false; } + // Copy the weight values into externalWeights. + std::memcpy(externalWeights.values, dataPtr, nbytes); + *weights = externalWeights; return true; } @@ -790,7 +797,7 @@ nvinfer1::ITensor& convertToTensor(TensorOrWeights& input, IImporterContext* ctx // Register layer and constant name (if set) into RefitMap: if (weights.getName()) { - ctx->registerLayer(constantLayer, weights.getName()); + ctx->registerLayer(constantLayer, weights.getName(), nullptr); ctx->network()->setWeightsName(weights, weights.getName()); } return *(constantLayer->getOutput(0)); @@ -900,7 +907,7 @@ NodeImportResult elementwiseHelper(IImporterContext* ctx, ::ONNX_NAMESPACE::Node && "The number of dimensions should remain the same adding inputs.", ErrorCode::kUNSUPPORTED_NODE); auto* layer = ctx->network()->addElementWise(*combined, *tensor, binary_op); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); ASSERT(layer && "Failed to register layer.", ErrorCode::kUNSUPPORTED_NODE); combined = layer->getOutput(0); } @@ -918,7 +925,7 @@ nvinfer1::ITensor* flattenTensor( nvinfer1::IShuffleLayer* flattenLayer = addShuffle(ctx, tensor, concat(ctx, d0, d1), /*zeroIsPlaceholder=*/false); if (regLayer) { - ctx->registerLayer(flattenLayer, getNodeName(node)); + ctx->registerLayer(flattenLayer, node); } return flattenLayer->getOutput(0); } @@ -1172,7 +1179,7 @@ nvinfer1::ITensor* globalPoolingHelper(IImporterContext* ctx, ::ONNX_NAMESPACE:: // Generate a bitmask of all 1s except the last 2 bits (N and C axes) uint32_t reduceAxes = ((1 << dims.nbDims) - 1) & ~0b11; auto* layer = ctx->network()->addReduce(tensor, op, reduceAxes, /*keepDimensions=*/true); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); return layer->getOutput(0); } @@ -1185,21 +1192,18 @@ nvinfer1::ITensor* greaterLessOrEqual(IImporterContext* ctx, const ::ONNX_NAMESP return result; } -nvinfer1::IPluginCreator* importPluginCreator( - const std::string& pluginName, const std::string& pluginVersion, const std::string& pluginNamespace) +nvinfer1::IPluginCreator* importPluginCreator(IImporterContext* ctx, std::string const& pluginName, + std::string const& pluginVersion, std::string const& pluginNamespace) { nvinfer1::IPluginCreator* creator = nullptr; #if ENABLE_STD_PLUGIN - auto pluginRegistry = nvinfer1::getBuilderPluginRegistry(nvinfer1::EngineCapability::kSTANDARD); - if (pluginRegistry != nullptr) - { - creator = pluginRegistry->getPluginCreator(pluginName.c_str(), pluginVersion.c_str(), pluginNamespace.c_str()); - } + auto& pluginRegistry = ctx->network()->getBuilder().getPluginRegistry(); + creator = pluginRegistry.getPluginCreator(pluginName.c_str(), pluginVersion.c_str(), pluginNamespace.c_str()); #endif // ENABLE_STD_PLUGIN #if ENABLE_SAFE_PLUGIN - auto safetyPluginRegistry = nvinfer1::getBuilderPluginRegistry(nvinfer1::EngineCapability::kSAFETY); + auto safetyPluginRegistry = nvinfer1::getBuilderSafePluginRegistry(nvinfer1::EngineCapability::kSAFETY); if (creator == nullptr && safetyPluginRegistry != nullptr) { creator = safetyPluginRegistry->getPluginCreator( @@ -1230,6 +1234,66 @@ bool isDynamic(const nvinfer1::Dims& shape) return std::any_of(shape.d, shape.d + shape.nbDims, [](int dim) { return dim < 0; }); } +NodeImportResult instanceNormPluginHelper( + IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, std::vector& inputs) +{ + // Scales and biases must be initializers + ASSERT(inputs.at(1).is_weights() && "The scale tensor is required to be an initializer.", + ErrorCode::kUNSUPPORTED_NODE); + ASSERT( + inputs.at(2).is_weights() && "The bias tensor is required to be an initializer.", ErrorCode::kUNSUPPORTED_NODE); + nvinfer1::ITensor* tensorPtr = &convertToTensor(inputs.at(0), ctx); + int32_t nbDims = tensorPtr->getDimensions().nbDims; + ASSERT(nbDims >= 3 && nbDims <= 5 && "TensorRT only supports InstanceNormalization on 3D, 4D, or 5D tensors!", + ErrorCode::kUNSUPPORTED_NODE); + + const bool needToExpandDims = (nbDims == 3); + if (needToExpandDims) + { + // Expand spatial dims from 1D to 2D + std::vector const axes{3}; + tensorPtr = unsqueezeTensor(ctx, node, *tensorPtr, axes); + ASSERT(tensorPtr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); + } + auto scale_weights = inputs.at(1).weights(); + auto bias_weights = inputs.at(2).weights(); + OnnxAttrs attrs(node, ctx); + float epsilon = attrs.get("epsilon", 1e-5F); + int32_t const relu{0}; // the ONNX instance norm op does not use the relu parameter + float const alpha{0.F}; // the ONNX instance norm op does not use the alpha parameter + + // Populate instanceNormalization plugin properties. + std::string const pluginName = "InstanceNormalization_TRT"; + std::string const pluginVersion = "1"; + std::vector f; + f.emplace_back("epsilon", &epsilon, nvinfer1::PluginFieldType::kFLOAT32, 1); + f.emplace_back("scales", scale_weights.values, nvinfer1::PluginFieldType::kFLOAT32, scale_weights.count()); + f.emplace_back("bias", bias_weights.values, nvinfer1::PluginFieldType::kFLOAT32, bias_weights.count()); + f.emplace_back("relu", &relu, nvinfer1::PluginFieldType::kINT32, 1); + f.emplace_back("alpha", &alpha, nvinfer1::PluginFieldType::kFLOAT32, 1); + + // Create plugin from registry + auto const plugin = createPlugin(getNodeName(node), importPluginCreator(ctx, pluginName, pluginVersion), f); + + ASSERT(plugin != nullptr && "InstanceNormalization plugin was not found in the plugin registry!", + ErrorCode::kUNSUPPORTED_NODE); + + auto* layer = ctx->network()->addPluginV2(&tensorPtr, 1, *plugin); + ctx->registerLayer(layer, node); + tensorPtr = layer->getOutput(0); + + if (needToExpandDims) + { + // Un-expand spatial dims back to 1D + std::vector const axes{3}; + tensorPtr = squeezeTensor(ctx, node, *tensorPtr, axes); + ASSERT(tensorPtr && "Failed to unsqueeze tensor.", ErrorCode::kUNSUPPORTED_NODE); + } + + return {{tensorPtr}}; + +} + nvinfer1::ITensor* iota(IImporterContext* ctx, ShapeTensor iotaDims, int32_t axis) { std::vector deltaVals(iotaDims.size(), 0); @@ -1549,6 +1613,49 @@ nvinfer1::Dims makeDims(int nbDims, int val) return dims; } +NodeImportResult normalizationHelper( + IImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, std::vector& inputs) +{ + auto* input = &convertToTensor(inputs.at(0), ctx); + auto* scale = &convertToTensor(inputs.at(1), ctx); + auto* bias = &convertToTensor(inputs.at(2), ctx); + + OnnxAttrs attrs(node, ctx); + float epsilon = attrs.get("epsilon", 1e-5f); + int32_t nbGroups = attrs.get("num_groups", 1); + + auto nbDims = input->getDimensions().nbDims; + ASSERT(nbDims >= 3 && "Input to normalization should be at least 3D!", ErrorCode::kINVALID_NODE); + + // Need to broadcast scale and bias to the input shape. Note that normal broadcasting rules cannot be applied + // as scale and bias are 1D and need to be broadcasted to shape [1, S, 1, 1, ...]. + uint32_t axesMask{0}; + std::vector unsqueezeAxes; + + for (int32_t i = 0; i < nbDims; i++) + { + if (i == 1) + { + continue; + } + // Axes should correspond to the spatial dimensions + if (i >= 2) + { + axesMask |= 1 << i; + } + unsqueezeAxes.push_back(i); + } + + scale = unsqueezeTensor(ctx, node, *scale, unsqueezeAxes); + bias = unsqueezeTensor(ctx, node, *bias, unsqueezeAxes); + + auto* layer = ctx->network()->addNormalization(*input, *scale, *bias, axesMask); + layer->setEpsilon(epsilon); + layer->setNbGroups(nbGroups); + ctx->registerLayer(layer, node); + return {{layer->getOutput(0)}}; +} + nvinfer1::Dims insertDimension(const nvinfer1::Dims& dims, const int axis, const int value) { assert(dims.nbDims < nvinfer1::Dims::MAX_DIMS && axis < nvinfer1::Dims::MAX_DIMS); @@ -1682,7 +1789,7 @@ NodeImportResult poolingHelper(IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProt poolingLayer->setPrePadding(beg_padding); poolingLayer->setPostPadding(end_padding); - ctx->registerLayer(poolingLayer, getNodeName(node)); + ctx->registerLayer(poolingLayer, node); tensorPtr = poolingLayer->getOutput(0); dims = tensorPtr->getDimensions(); if (needToExpandDims) @@ -1726,7 +1833,7 @@ NodeImportResult reduceTensor(IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto } auto* layer = ctx->network()->addReduce(tensor, operation, axisMask, keepdims); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); return {{layer->getOutput(0)}}; } @@ -1776,7 +1883,7 @@ NodeImportResult scaleHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE::Node auto* layer = ctx->network()->addScaleNd(*tensorPtr, mode, shift, scale, power, 1); ASSERT(layer && "Failed to add a Scale layer.", ErrorCode::kUNSUPPORTED_NODE); // Register layer name, and shift and scale weight names for the refit map. - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); ctx->network()->setWeightsName(shift, shiftName); ctx->network()->setWeightsName(scale, scaleName); @@ -1787,7 +1894,6 @@ NodeImportResult scaleHelper(IImporterContext* ctx, const ::ONNX_NAMESPACE::Node tensorPtr = &reshape(ctx, *tensorPtr, origShape); ASSERT(tensorPtr && "Failed to reshape tensor.", ErrorCode::kUNSUPPORTED_NODE); } - return {{tensorPtr}}; } @@ -1858,7 +1964,7 @@ nvinfer1::ITensor* squeezeTensor(IImporterContext* ctx, const ::ONNX_NAMESPACE:: nvinfer1::IShuffleLayer* squeezeLayer = addShuffle(ctx, tensor, newDims); if (regLayer) { - ctx->registerLayer(squeezeLayer, getNodeName(node)); + ctx->registerLayer(squeezeLayer, node); } return squeezeLayer->getOutput(0); } @@ -1869,7 +1975,7 @@ nvinfer1::ITensor* transposeTensor(IImporterContext* ctx, const ::ONNX_NAMESPACE const nvinfer1::Dims shape = tensor.getDimensions(); nvinfer1::IShuffleLayer* layer = ctx->network()->addShuffle(tensor); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); if (!layer) { return nullptr; @@ -1899,7 +2005,6 @@ NodeImportResult unaryHelper( IImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, TensorOrWeights& input, nvinfer1::UnaryOperation op) { nvinfer1::ITensor* tensorPtr = &convertToTensor(input, ctx); - const auto rank = tensorPtr->getDimensions().nbDims; const auto inputType = tensorPtr->getType(); bool validUnaryType = true; @@ -1930,6 +2035,11 @@ NodeImportResult unaryHelper( validUnaryType = (inputType != nvinfer1::DataType::kBOOL && inputType != nvinfer1::DataType::kUINT8); break; } + case nvinfer1::UnaryOperation::kISINF: + { + validUnaryType = (inputType == nvinfer1::DataType::kFLOAT || inputType == nvinfer1::DataType::kHALF); + break; + } default: { // By default TRT does not support BOOL, INT32, UINT8 types for Unary operations. @@ -1941,24 +2051,10 @@ NodeImportResult unaryHelper( && "This version of TensorRT does not support the given operator with the given input data type.", ErrorCode::kUNSUPPORTED_NODE); - // Support scalar inputs by unsqueezing to 1D - if (rank == 0) - { - std::vector axes{0}; - tensorPtr = unsqueezeTensor(ctx, node, *tensorPtr, axes); - } - nvinfer1::IUnaryLayer* layer = ctx->network()->addUnary(*tensorPtr, op); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); tensorPtr = layer->getOutput(0); - // Squeeze scalar inputs back into a scalar - if (rank == 0) - { - std::vector axes{0}; - tensorPtr = squeezeTensor(ctx, node, *tensorPtr, axes); - } - return {{tensorPtr}}; } @@ -2074,7 +2170,7 @@ NodeImportResult convMultiInput( { layer->setInput(2, *bias_tensor_ptr); } - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); nvinfer1::ITensor* output_tensor_ptr = layer->getOutput(0); if (needToExpandDims) @@ -2113,7 +2209,7 @@ nvinfer1::ITensor* unsqueezeTensor(IImporterContext* ctx, const ::ONNX_NAMESPACE nvinfer1::IShuffleLayer* unsqueezeLayer = addShuffle(ctx, tensor, newDims); if (regLayer) { - ctx->registerLayer(unsqueezeLayer, getNodeName(node)); + ctx->registerLayer(unsqueezeLayer, node); } return unsqueezeLayer->getOutput(0); } @@ -2246,7 +2342,7 @@ nvinfer1::ITensor* addSoftmax(IImporterContext* ctx, const ::ONNX_NAMESPACE::Nod // ONNX softmax is always on second dimension. softMax->setAxes(1 << 1); } - ctx->registerLayer(softMax, node.name()); + ctx->registerLayer(softMax, node); return softMax->getOutput(0); } @@ -2285,16 +2381,16 @@ NodeImportResult addScatterLayer(IImporterContext* ctx, ::ONNX_NAMESPACE::NodePr auto* layer = ctx->network()->addScatter(data, indices, updates, mode); layer->setAxis(axis); - ctx->registerLayer(layer, getNodeName(node)); + ctx->registerLayer(layer, node); return {{layer->getOutput(0)}}; } -//! Helper function to calculate mod(A, B) +//! Helper function to calculate mod(A, B) nvinfer1::IElementWiseLayer* modWithIntegerInputs(IImporterContext* ctx, nvinfer1::ITensor* input0, nvinfer1::ITensor* input1, bool fmod){ using eOp = nvinfer1::ElementWiseOperation; auto divOp = eOp::kFLOOR_DIV; if (fmod) divOp = eOp::kDIV; - + // input0 - (input1 * divOp(input0, input1)) return ctx->network()->addElementWise(*input0, *ctx->network()->addElementWise(*input1, @@ -2334,4 +2430,60 @@ float* convertFP16Data(void* weightValues, nvinfer1::Dims shape, IImporterContex return newWeights; } +std::string filterDocString(std::string const& docString) +{ + auto splitString = [](auto const& docString) { + std::vector lines; + + std::stringstream ss(docString); + std::string line; + while (std::getline(ss, line, '\n')) + { + lines.push_back(line); + } + + return lines; + }; + + std::ostringstream filteredDocStream; + // The doc strings that PyTorch lib generates contain the literal `site-packages` or `dist-packages`. + // We filter such lines out to keep only the doc strings of the user-programmed codes. + std::vector patterns{"site-packages", "dist-packages"}; + std::vector lines = splitString(docString); + for (auto &line: lines) { + bool writeLine = true; + for (auto &pattern : patterns) + { + if (line.find(pattern) != std::string::npos) + { + writeLine = false; + break; + } + } + if (writeLine) + { + // A double-quote substring in a line breaks the JSON format. + // For that reason, we change it to a single-quote substring, if any. + std::replace(line.begin(), line.end(), '\"', '\''); + filteredDocStream << " | " << line; + } + } + return filteredDocStream.str(); +} + +Status processMetadata(::ONNX_NAMESPACE::NodeProto const& node, nvinfer1::ILayer* layer) +{ + std::string docString = node.doc_string(); + std::string filteredDocString = "[ONNX Layer: " + getNodeName(node); + + if (docString.size() != 0) + { + filteredDocString += filterDocString(docString); + } + filteredDocString += "]"; + + ASSERT((layer != nullptr) && "The layer object does not exist.", ErrorCode::kUNSUPPORTED_NODE); + layer->setMetadata(filteredDocString.c_str()); + return Status::success(); +} } // namespace onnx2trt diff --git a/onnx2trt_utils.hpp b/onnx2trt_utils.hpp index fbb7ba63..7cafa9e9 100644 --- a/onnx2trt_utils.hpp +++ b/onnx2trt_utils.hpp @@ -75,6 +75,7 @@ static std::ostream& operator<<(std::ostream& stream, const nvinfer1::DataType& case nvinfer1::DataType::kUINT8: return stream << "uint8"; case nvinfer1::DataType::kINT32: return stream << "int32"; case nvinfer1::DataType::kBOOL: return stream << "bool"; + case nvinfer1::DataType::kFP8: return stream << "float8"; default: throw std::runtime_error("Unknown dtype"); } } @@ -270,12 +271,16 @@ nvinfer1::ITensor* greaterLessOrEqual(IImporterContext* ctx, const ::ONNX_NAMESP // Helper function to determine if a shape contains dynamic dimensions bool isDynamic(const nvinfer1::Dims& shape); +// Helper function to use optimized 3D instanceNorm plugin +NodeImportResult instanceNormPluginHelper( + IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, std::vector& inputs); + // Helper fucntion to create an iota fill given a set of dimensions and an axis nvinfer1::ITensor* iota(IImporterContext* ctx, ShapeTensor iotaDims, int32_t axis); // Helper function to load a creator from the registry -nvinfer1::IPluginCreator* importPluginCreator( - const std::string& pluginName, const std::string& pluginVersion, const std::string& pluginNamespace = ""); +nvinfer1::IPluginCreator* importPluginCreator(IImporterContext* ctx, std::string const& pluginName, + std::string const& pluginVersion, std::string const& pluginNamespace = ""); // Helper function to get a plugin from the PluginRegistry std::unique_ptr createPlugin(const std::string& name, @@ -291,6 +296,10 @@ NodeImportResult lstmLegacyImporter( // Helper function to create and fill a Dims object with defined values nvinfer1::Dims makeDims(int nbDims, int val); +// Helper function to create normalization layers for GroupNorm and InstanceNorm +NodeImportResult normalizationHelper( + IImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node, std::vector& inputs); + // Helper function to parse activation values for LSTM nodes std::vector parseLSTMActivationValues(const std::vector& activationTypes, const std::vector& activationValues, bool isAlpha); @@ -455,4 +464,5 @@ float* convertFP16Data(void* weightValues, nvinfer1::Dims shape, IImporterContex // Helper function to validate input types for an ONNX node Status notInvalidType(TensorOrWeights const& input, std::vector const& invalidTypes); +Status processMetadata(::ONNX_NAMESPACE::NodeProto const& node, nvinfer1::ILayer* layer); } // namespace onnx2trt diff --git a/trt_utils.hpp b/trt_utils.hpp index 71cd8f78..fe4af868 100644 --- a/trt_utils.hpp +++ b/trt_utils.hpp @@ -22,7 +22,8 @@ inline int getDtypeSize(nvinfer1::DataType trtDtype) { case nvinfer1::DataType::kFLOAT: return 4; case nvinfer1::DataType::kUINT8: - case nvinfer1::DataType::kINT8: return 1; + case nvinfer1::DataType::kINT8: + case nvinfer1::DataType::kFP8: return 1; case nvinfer1::DataType::kHALF: return 2; case nvinfer1::DataType::kINT32: return 4; @@ -156,9 +157,9 @@ inline ::ONNX_NAMESPACE::TensorProto_DataType trtDataTypeToONNX(nvinfer1::DataTy case nvinfer1::DataType::kINT8: return ::ONNX_NAMESPACE::TensorProto::INT8; case nvinfer1::DataType::kBOOL: return ::ONNX_NAMESPACE::TensorProto::BOOL; case nvinfer1::DataType::kUINT8: return ::ONNX_NAMESPACE::TensorProto::UINT8; - default: return ::ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED; + case nvinfer1::DataType::kFP8: break; } - throw std::runtime_error{"Unreachable"}; + return ::ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED; } } // namespace onnx2trt