Skip to content

Commit

Permalink
take comments
Browse files Browse the repository at this point in the history
  • Loading branch information
frank-dong-ms committed Oct 24, 2024
1 parent ead9032 commit c87f420
Show file tree
Hide file tree
Showing 12 changed files with 24 additions and 21 deletions.
2 changes: 1 addition & 1 deletion include/onnxruntime/core/framework/op_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class OpKernel {
// the allocator tied to the session if the kernel owns the pre-packed buffer or an
// allocator shared between sessions if the pre-packed buffer is to be shared across sessions
// (i.e.) the kernel does not own the buffer.
// @param save_prepacked_initializers: Set it to true if intend to save prepacked initializers to onnx data file.
// @param save_prepacked_initializers: Set it to true if intend to save prepacked initializers to external data file.
// @param is_packed: Set it to true if the kernel packed the tensor or to false
// The kernel is responsible for keeping the packed data and related metadata if is_packed is true,
// and the original initialized constant tensor will be released and not accessible anymore in
Expand Down
2 changes: 1 addition & 1 deletion include/onnxruntime/core/graph/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -1150,7 +1150,7 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi

// Since one constant initializer could be used by different kernels
// and prepacked differently, use an unordered_map to store prepacked
// initializer in format of <[initializer_name], <[kernel_name], [prepacked_initializer]>>
// initializer in format of <[initializer_name], <[node_name], [prepacked_initializer]>>
typedef std::unordered_map<std::string, std::unordered_map<std::string, ONNX_NAMESPACE::TensorProto>> PrePackedTensorProtoToSave;

#if !defined(ORT_MINIMAL_BUILD)
Expand Down
7 changes: 5 additions & 2 deletions onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc
Original file line number Diff line number Diff line change
Expand Up @@ -282,13 +282,16 @@ std::optional<Tensor> MatMulNBits<T1>::GetPrePackTensor(int input_idx) {
// Inorder to cope with this logic, we need to return latest prepacked buffer and only serialize
// the latest one. So, we need to always return packed_tensor_ here not only for input_B.
ORT_UNUSED_PARAMETER(input_idx);
return std::move(packed_tensor_.value());
return std::move(packed_tensor_);
}

template <typename T1>
Status MatMulNBits<T1>::SetPrePackTensor(int input_idx, Tensor& pre_packed_tensor) {
if (input_idx == 1) {
packed_b_ = BufferUniquePtr(pre_packed_tensor.MutableDataRaw());
// pre_packed_tensor is constant initialized tensor and its lifecycle is managed by session_state,
// session_state will release memory from pre_packed_tensor. packed_b_ will not release memory so
// pass empty/default buffer deleter here.
packed_b_ = BufferUniquePtr(pre_packed_tensor.MutableDataRaw(), BufferDeleter());
}

return Status::OK();
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/framework/session_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,8 @@ Status SessionState::PrepackConstantInitializedTensors(InlinedHashMap<std::strin
// If prepacked weights already read from ONNX data file (this happens we ORT reads data file with prepacked
// weights serialized), only need to set prepacked weights once to kernel.
is_kernel_prepacked = true;
ORT_THROW_IF_ERROR(kernel->SetPrePackTensor(input_idx, *(constant_initialized_tensors[ort_value_idx].GetMutable<Tensor>())));
ORT_THROW_IF_ERROR(kernel->SetPrePackTensor(input_idx,
*(constant_initialized_tensors[ort_value_idx].GetMutable<Tensor>())));
}
// Caching pre-packed weights is limited to shared initializers associated with the CPU EP for now
else if (is_shared_initializer && should_cache_prepacked_weights_for_shared_initializers &&
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/framework/session_state_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ static inline common::Status ExtDataTensorProtoToTensor(const Env& env,
SafeInt<size_t> ext_data_len = 0;
ORT_RETURN_IF_ERROR(utils::GetExtDataFromTensorProto(env, proto_path.c_str(), tensor_proto,
ext_data_buf, ext_data_len, ext_data_deleter,
pre_packed_initializers_name_set, buffered_tensor));
&pre_packed_initializers_name_set, buffered_tensor));

// NB: creating a do-nothing allocator per tensor is wasteful; can perhaps be
// avoided if the Tensor class implements the do-nothing behavior when given a
Expand Down
9 changes: 4 additions & 5 deletions onnxruntime/core/framework/tensorprotoutils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1000,7 +1000,7 @@ static Status GetFileContent(const Env& env, const std::filesystem::path& file_p
Status GetExtDataFromTensorProto(const Env& env, const std::filesystem::path& model_path,
const ONNX_NAMESPACE::TensorProto& tensor_proto, void*& ext_data_buf,
SafeInt<size_t>& ext_data_len, OrtCallback& ext_data_deleter,
SessionState::PrePackInitializers::PrePackedTensorNamesReadFromFile& pre_packed_initializers_name_set, Tensor* buffered_tensor) {
SessionState::PrePackInitializers::PrePackedTensorNamesReadFromFile* pre_packed_initializers_name_set, Tensor* buffered_tensor) {
ORT_ENFORCE(utils::HasExternalData(tensor_proto));
std::basic_string<ORTCHAR_T> tensor_proto_dir;
if (!model_path.empty()) {
Expand All @@ -1013,8 +1013,8 @@ Status GetExtDataFromTensorProto(const Env& env, const std::filesystem::path& mo
ORT_RETURN_IF_ERROR(
GetExternalDataInfo(tensor_proto, tensor_proto_dir, external_data_file_path, file_offset, raw_data_safe_len, pre_packed));

if (pre_packed) {
pre_packed_initializers_name_set.insert(tensor_proto.name());
if (pre_packed && pre_packed_initializers_name_set != nullptr) {
(*pre_packed_initializers_name_set).insert(tensor_proto.name());
}

if (external_data_file_path == onnxruntime::utils::kTensorProtoMemoryAddressTag) {
Expand Down Expand Up @@ -1123,10 +1123,9 @@ Status TensorProtoToTensor(const Env& env, const std::filesystem::path& model_pa
SafeInt<size_t> raw_data_len = 0;
AutoDelete deleter_for_file_data;
OrtCallback& d = deleter_for_file_data.d;
SessionState::PrePackInitializers::PrePackedTensorNamesReadFromFile pre_packed_initializers_name_set;

if (utils::HasExternalData(tensor_proto)) {
ORT_RETURN_IF_ERROR(GetExtDataFromTensorProto(env, model_path, tensor_proto, raw_data, raw_data_len, d, pre_packed_initializers_name_set));
ORT_RETURN_IF_ERROR(GetExtDataFromTensorProto(env, model_path, tensor_proto, raw_data, raw_data_len, d, nullptr));
} else if (utils::HasRawData(tensor_proto)) {
raw_data = const_cast<char*>(tensor_proto.raw_data().data());
// TODO The line above has const-correctness issues. Below is a possible fix which copies the tensor_proto data
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/framework/tensorprotoutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ common::Status GetExtDataFromTensorProto(const Env& env, const std::filesystem::
const ONNX_NAMESPACE::TensorProto& tensor_proto,
void*& ext_data_buf, SafeInt<size_t>& ext_data_len,
OrtCallback& ext_data_deleter,
SessionState::PrePackInitializers::PrePackedTensorNamesReadFromFile& pre_packed_initializers_name_set,
SessionState::PrePackInitializers::PrePackedTensorNamesReadFromFile* pre_packed_initializers_name_set,
Tensor* buffered_tensor = nullptr);

// Given a tensor proto with external data obtain a tensor using the specified custom external data loader.
Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/core/framework/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1064,10 +1064,10 @@ bool IsOutputOnCpu(const Node& node, const KernelCreateInfo* p_kci, size_t index
return false;
}

std::string GetPrepackedInitializerName(const std::string& initializer_name, const std::string& kernel_name) {
std::string GetPrepackedInitializerName(const std::string& initializer_name, const std::string& node_name) {
const std::string seperator = ":";

return initializer_name + seperator + kernel_name;
return initializer_name + seperator + node_name;
}

} // namespace utils
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/framework/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ constexpr ONNXTensorElementDataType GetONNXTensorElementDataType<UInt4x2>() {

int32_t ONNXTensorElementDataTypeToProtoTensorType(ONNXTensorElementDataType);

std::string GetPrepackedInitializerName(const std::string& initializer_name, const std::string& kernel_name);
std::string GetPrepackedInitializerName(const std::string& initializer_name, const std::string& node_name);

#ifdef ENABLE_TRAINING
common::Status VerifyInputTensorsAllocatedContiguously(OpKernelContext* context);
Expand Down
8 changes: 4 additions & 4 deletions onnxruntime/core/graph/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4175,19 +4175,19 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std
InlinedVector<TensorProto> pre_packed_initializers_tensor_proto;
// If this initializer has been prepacked, saved prepacked external initializer instead of original one.
// Since one initializer could be used by multiple kernels and been prepacked differently,
// Save each prepacked initializers seperately, chagne the initializer name to [initializer_name]:[kernel_name]
// Save each prepacked initializers seperately, chagne the initializer name to [initializer_name]:[node_name]
// to avoid conflict. Change the node input name accordingly.
// IT could potentially make the ONNX data file larger since we store multiple prepacked initializers into disk
// but this could be rare case.
if (save_prepacked_constant_initializers && pre_packed_initializers.count(initializer.name())) {
for (const auto& item : pre_packed_initializers[initializer.name()]) {
auto& kernel_name = item.first;
std::string prepacked_initializer_name = utils::GetPrepackedInitializerName(initializer.name(), kernel_name);
auto& node_name = item.first;
std::string prepacked_initializer_name = utils::GetPrepackedInitializerName(initializer.name(), node_name);
pre_packed_initializers_tensor_proto.push_back(item.second);
use_pre_packed_initializer = true;

for (auto& node : *result.mutable_node()) {
if (node.name() == kernel_name) {
if (node.name() == node_name) {
int input_index = 0;
for (const auto& input : node.input()) {
if (input == initializer.name()) {
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/session/inference_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2028,7 +2028,6 @@ common::Status InferenceSession::Initialize() {
}

SessionState::PrePackInitializers pre_packed_initializers;
Graph::PrePackedTensorProtoToSave pre_packed_initializers_tensor_proto;
ORT_RETURN_IF_ERROR_SESSIONID_(
session_state_->FinalizeSessionState(model_location_, kernel_registry_manager_,
// need to keep the initializers if saving the optimized model
Expand Down Expand Up @@ -2070,6 +2069,7 @@ common::Status InferenceSession::Initialize() {
align_info.align_offset = true;
bool save_prepacked_constant_initializers =
session_options_.config_options.GetConfigOrDefault(kOrtSessionOptionsSavePrePackedConstantInitializers, "0") == "1" ? true : false;
Graph::PrePackedTensorProtoToSave pre_packed_initializers_tensor_proto;
if (save_prepacked_constant_initializers) {
LOGS(*session_logger_, WARNING) << "Serialize prepacked initializers option has been turn on."
<< "Use this option only when run model inference on PC with CPU."
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/test/framework/inference_session_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,7 @@ TEST(InferenceSessionTests, TestModelSerialization) {
#if !ENABLE_TRAINING && !defined(USE_CUDA) && !defined(__wasm__) && !defined(USE_DNNL) && !defined(USE_QNN) && !defined(__ANDROID__) && !defined(USE_COREML)
// MLAS dispatcher used in matmul_nbits kernels here is 64 bit only
#if defined(__amd64__) || defined(_M_AMD64) || defined(__aarch64__) || defined(_M_ARM64)
TEST(InferenceSessionTests, TestPrepackSerialization) {
TEST(InferenceSessionTests, TestPrePackSerialization) {
SessionOptions so;
std::string model_name = "model_with_matmul_nbits";

Expand Down

0 comments on commit c87f420

Please sign in to comment.