From c87f420c5bb9912f6142571f6441b196568aed89 Mon Sep 17 00:00:00 2001 From: Frank Dong Date: Wed, 23 Oct 2024 17:36:32 -0700 Subject: [PATCH] take comments --- include/onnxruntime/core/framework/op_kernel.h | 2 +- include/onnxruntime/core/graph/graph.h | 2 +- onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc | 7 +++++-- onnxruntime/core/framework/session_state.cc | 3 ++- onnxruntime/core/framework/session_state_utils.cc | 2 +- onnxruntime/core/framework/tensorprotoutils.cc | 9 ++++----- onnxruntime/core/framework/tensorprotoutils.h | 2 +- onnxruntime/core/framework/utils.cc | 4 ++-- onnxruntime/core/framework/utils.h | 2 +- onnxruntime/core/graph/graph.cc | 8 ++++---- onnxruntime/core/session/inference_session.cc | 2 +- onnxruntime/test/framework/inference_session_test.cc | 2 +- 12 files changed, 24 insertions(+), 21 deletions(-) diff --git a/include/onnxruntime/core/framework/op_kernel.h b/include/onnxruntime/core/framework/op_kernel.h index 09747aa80148a..0a74e93baa4e5 100644 --- a/include/onnxruntime/core/framework/op_kernel.h +++ b/include/onnxruntime/core/framework/op_kernel.h @@ -79,7 +79,7 @@ class OpKernel { // the allocator tied to the session if the kernel owns the pre-packed buffer or an // allocator shared between sessions if the pre-packed buffer is to be shared across sessions // (i.e.) the kernel does not own the buffer. - // @param save_prepacked_initializers: Set it to true if intend to save prepacked initializers to onnx data file. + // @param save_prepacked_initializers: Set it to true if intend to save prepacked initializers to external data file. // @param is_packed: Set it to true if the kernel packed the tensor or to false // The kernel is responsible for keeping the packed data and related metadata if is_packed is true, // and the original initialized constant tensor will be released and not accessible anymore in diff --git a/include/onnxruntime/core/graph/graph.h b/include/onnxruntime/core/graph/graph.h index 14caf97e953b2..69af3c93d7a07 100644 --- a/include/onnxruntime/core/graph/graph.h +++ b/include/onnxruntime/core/graph/graph.h @@ -1150,7 +1150,7 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi // Since one constant initializer could be used by different kernels // and prepacked differently, use an unordered_map to store prepacked - // initializer in format of <[initializer_name], <[kernel_name], [prepacked_initializer]>> + // initializer in format of <[initializer_name], <[node_name], [prepacked_initializer]>> typedef std::unordered_map> PrePackedTensorProtoToSave; #if !defined(ORT_MINIMAL_BUILD) diff --git a/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc b/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc index 1bcd1b372d19f..7c568a6f7547b 100644 --- a/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc +++ b/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc @@ -282,13 +282,16 @@ std::optional MatMulNBits::GetPrePackTensor(int input_idx) { // Inorder to cope with this logic, we need to return latest prepacked buffer and only serialize // the latest one. So, we need to always return packed_tensor_ here not only for input_B. ORT_UNUSED_PARAMETER(input_idx); - return std::move(packed_tensor_.value()); + return std::move(packed_tensor_); } template Status MatMulNBits::SetPrePackTensor(int input_idx, Tensor& pre_packed_tensor) { if (input_idx == 1) { - packed_b_ = BufferUniquePtr(pre_packed_tensor.MutableDataRaw()); + // pre_packed_tensor is constant initialized tensor and its lifecycle is managed by session_state, + // session_state will release memory from pre_packed_tensor. packed_b_ will not release memory so + // pass empty/default buffer deleter here. + packed_b_ = BufferUniquePtr(pre_packed_tensor.MutableDataRaw(), BufferDeleter()); } return Status::OK(); diff --git a/onnxruntime/core/framework/session_state.cc b/onnxruntime/core/framework/session_state.cc index 40a7819ade0af..6d4ef0af2c488 100644 --- a/onnxruntime/core/framework/session_state.cc +++ b/onnxruntime/core/framework/session_state.cc @@ -437,7 +437,8 @@ Status SessionState::PrepackConstantInitializedTensors(InlinedHashMapSetPrePackTensor(input_idx, *(constant_initialized_tensors[ort_value_idx].GetMutable()))); + ORT_THROW_IF_ERROR(kernel->SetPrePackTensor(input_idx, + *(constant_initialized_tensors[ort_value_idx].GetMutable()))); } // Caching pre-packed weights is limited to shared initializers associated with the CPU EP for now else if (is_shared_initializer && should_cache_prepacked_weights_for_shared_initializers && diff --git a/onnxruntime/core/framework/session_state_utils.cc b/onnxruntime/core/framework/session_state_utils.cc index 39abdac5181e6..3424f40e79c01 100644 --- a/onnxruntime/core/framework/session_state_utils.cc +++ b/onnxruntime/core/framework/session_state_utils.cc @@ -79,7 +79,7 @@ static inline common::Status ExtDataTensorProtoToTensor(const Env& env, SafeInt ext_data_len = 0; ORT_RETURN_IF_ERROR(utils::GetExtDataFromTensorProto(env, proto_path.c_str(), tensor_proto, ext_data_buf, ext_data_len, ext_data_deleter, - pre_packed_initializers_name_set, buffered_tensor)); + &pre_packed_initializers_name_set, buffered_tensor)); // NB: creating a do-nothing allocator per tensor is wasteful; can perhaps be // avoided if the Tensor class implements the do-nothing behavior when given a diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc index d07a68ad32669..83eec9632054c 100644 --- a/onnxruntime/core/framework/tensorprotoutils.cc +++ b/onnxruntime/core/framework/tensorprotoutils.cc @@ -1000,7 +1000,7 @@ static Status GetFileContent(const Env& env, const std::filesystem::path& file_p Status GetExtDataFromTensorProto(const Env& env, const std::filesystem::path& model_path, const ONNX_NAMESPACE::TensorProto& tensor_proto, void*& ext_data_buf, SafeInt& ext_data_len, OrtCallback& ext_data_deleter, - SessionState::PrePackInitializers::PrePackedTensorNamesReadFromFile& pre_packed_initializers_name_set, Tensor* buffered_tensor) { + SessionState::PrePackInitializers::PrePackedTensorNamesReadFromFile* pre_packed_initializers_name_set, Tensor* buffered_tensor) { ORT_ENFORCE(utils::HasExternalData(tensor_proto)); std::basic_string tensor_proto_dir; if (!model_path.empty()) { @@ -1013,8 +1013,8 @@ Status GetExtDataFromTensorProto(const Env& env, const std::filesystem::path& mo ORT_RETURN_IF_ERROR( GetExternalDataInfo(tensor_proto, tensor_proto_dir, external_data_file_path, file_offset, raw_data_safe_len, pre_packed)); - if (pre_packed) { - pre_packed_initializers_name_set.insert(tensor_proto.name()); + if (pre_packed && pre_packed_initializers_name_set != nullptr) { + (*pre_packed_initializers_name_set).insert(tensor_proto.name()); } if (external_data_file_path == onnxruntime::utils::kTensorProtoMemoryAddressTag) { @@ -1123,10 +1123,9 @@ Status TensorProtoToTensor(const Env& env, const std::filesystem::path& model_pa SafeInt raw_data_len = 0; AutoDelete deleter_for_file_data; OrtCallback& d = deleter_for_file_data.d; - SessionState::PrePackInitializers::PrePackedTensorNamesReadFromFile pre_packed_initializers_name_set; if (utils::HasExternalData(tensor_proto)) { - ORT_RETURN_IF_ERROR(GetExtDataFromTensorProto(env, model_path, tensor_proto, raw_data, raw_data_len, d, pre_packed_initializers_name_set)); + ORT_RETURN_IF_ERROR(GetExtDataFromTensorProto(env, model_path, tensor_proto, raw_data, raw_data_len, d, nullptr)); } else if (utils::HasRawData(tensor_proto)) { raw_data = const_cast(tensor_proto.raw_data().data()); // TODO The line above has const-correctness issues. Below is a possible fix which copies the tensor_proto data diff --git a/onnxruntime/core/framework/tensorprotoutils.h b/onnxruntime/core/framework/tensorprotoutils.h index 1f2288f84ebac..3a3bd1a881fce 100644 --- a/onnxruntime/core/framework/tensorprotoutils.h +++ b/onnxruntime/core/framework/tensorprotoutils.h @@ -159,7 +159,7 @@ common::Status GetExtDataFromTensorProto(const Env& env, const std::filesystem:: const ONNX_NAMESPACE::TensorProto& tensor_proto, void*& ext_data_buf, SafeInt& ext_data_len, OrtCallback& ext_data_deleter, - SessionState::PrePackInitializers::PrePackedTensorNamesReadFromFile& pre_packed_initializers_name_set, + SessionState::PrePackInitializers::PrePackedTensorNamesReadFromFile* pre_packed_initializers_name_set, Tensor* buffered_tensor = nullptr); // Given a tensor proto with external data obtain a tensor using the specified custom external data loader. diff --git a/onnxruntime/core/framework/utils.cc b/onnxruntime/core/framework/utils.cc index 2a06cbd398638..5402345447706 100644 --- a/onnxruntime/core/framework/utils.cc +++ b/onnxruntime/core/framework/utils.cc @@ -1064,10 +1064,10 @@ bool IsOutputOnCpu(const Node& node, const KernelCreateInfo* p_kci, size_t index return false; } -std::string GetPrepackedInitializerName(const std::string& initializer_name, const std::string& kernel_name) { +std::string GetPrepackedInitializerName(const std::string& initializer_name, const std::string& node_name) { const std::string seperator = ":"; - return initializer_name + seperator + kernel_name; + return initializer_name + seperator + node_name; } } // namespace utils diff --git a/onnxruntime/core/framework/utils.h b/onnxruntime/core/framework/utils.h index e27fb7a6d833a..db38ef1675595 100644 --- a/onnxruntime/core/framework/utils.h +++ b/onnxruntime/core/framework/utils.h @@ -234,7 +234,7 @@ constexpr ONNXTensorElementDataType GetONNXTensorElementDataType() { int32_t ONNXTensorElementDataTypeToProtoTensorType(ONNXTensorElementDataType); -std::string GetPrepackedInitializerName(const std::string& initializer_name, const std::string& kernel_name); +std::string GetPrepackedInitializerName(const std::string& initializer_name, const std::string& node_name); #ifdef ENABLE_TRAINING common::Status VerifyInputTensorsAllocatedContiguously(OpKernelContext* context); diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc index 94ae6caf2ae23..3f50841f50913 100644 --- a/onnxruntime/core/graph/graph.cc +++ b/onnxruntime/core/graph/graph.cc @@ -4175,19 +4175,19 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std InlinedVector pre_packed_initializers_tensor_proto; // If this initializer has been prepacked, saved prepacked external initializer instead of original one. // Since one initializer could be used by multiple kernels and been prepacked differently, - // Save each prepacked initializers seperately, chagne the initializer name to [initializer_name]:[kernel_name] + // Save each prepacked initializers seperately, chagne the initializer name to [initializer_name]:[node_name] // to avoid conflict. Change the node input name accordingly. // IT could potentially make the ONNX data file larger since we store multiple prepacked initializers into disk // but this could be rare case. if (save_prepacked_constant_initializers && pre_packed_initializers.count(initializer.name())) { for (const auto& item : pre_packed_initializers[initializer.name()]) { - auto& kernel_name = item.first; - std::string prepacked_initializer_name = utils::GetPrepackedInitializerName(initializer.name(), kernel_name); + auto& node_name = item.first; + std::string prepacked_initializer_name = utils::GetPrepackedInitializerName(initializer.name(), node_name); pre_packed_initializers_tensor_proto.push_back(item.second); use_pre_packed_initializer = true; for (auto& node : *result.mutable_node()) { - if (node.name() == kernel_name) { + if (node.name() == node_name) { int input_index = 0; for (const auto& input : node.input()) { if (input == initializer.name()) { diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index eef760bfe026d..e6aafaa1f2283 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -2028,7 +2028,6 @@ common::Status InferenceSession::Initialize() { } SessionState::PrePackInitializers pre_packed_initializers; - Graph::PrePackedTensorProtoToSave pre_packed_initializers_tensor_proto; ORT_RETURN_IF_ERROR_SESSIONID_( session_state_->FinalizeSessionState(model_location_, kernel_registry_manager_, // need to keep the initializers if saving the optimized model @@ -2070,6 +2069,7 @@ common::Status InferenceSession::Initialize() { align_info.align_offset = true; bool save_prepacked_constant_initializers = session_options_.config_options.GetConfigOrDefault(kOrtSessionOptionsSavePrePackedConstantInitializers, "0") == "1" ? true : false; + Graph::PrePackedTensorProtoToSave pre_packed_initializers_tensor_proto; if (save_prepacked_constant_initializers) { LOGS(*session_logger_, WARNING) << "Serialize prepacked initializers option has been turn on." << "Use this option only when run model inference on PC with CPU." diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc index daab52443c71f..99d8aa101ea2e 100644 --- a/onnxruntime/test/framework/inference_session_test.cc +++ b/onnxruntime/test/framework/inference_session_test.cc @@ -504,7 +504,7 @@ TEST(InferenceSessionTests, TestModelSerialization) { #if !ENABLE_TRAINING && !defined(USE_CUDA) && !defined(__wasm__) && !defined(USE_DNNL) && !defined(USE_QNN) && !defined(__ANDROID__) && !defined(USE_COREML) // MLAS dispatcher used in matmul_nbits kernels here is 64 bit only #if defined(__amd64__) || defined(_M_AMD64) || defined(__aarch64__) || defined(_M_ARM64) -TEST(InferenceSessionTests, TestPrepackSerialization) { +TEST(InferenceSessionTests, TestPrePackSerialization) { SessionOptions so; std::string model_name = "model_with_matmul_nbits";