Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enable large initializer offset align for save external data in ORT #21604

Merged
merged 15 commits into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion include/onnxruntime/core/graph/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -1139,16 +1139,48 @@
const ONNX_NAMESPACE::GraphProto& ToGraphProto();
ONNX_NAMESPACE::GraphProto ToGraphProto() const;

// Options to align external initializer offset.
// For models running on CPU, ORT will try to use mmap to load external initializers.
// To use mmap, external initializer need to be offset aligned.
// ORT saves external initializers into signle data file, each initializer is accessed with
// offset(start position of initializer) and length(byte length of initializer) of the data file.
// To use mmap, each offset need to be aligned which means offset need to divisible by
// allocation granularity(64KB for windows and 4K for other OSes).
// With align_offset to true, ORT will align offset for large initializer when
// save ONNX model with external data file.
struct OffsetAlignmentInfo {
// Offset will always be page aligned and allocation granularity aligned for mmap support.
// This is done by padding previous tensor data with zeros keeping same length.
frank-dong-ms marked this conversation as resolved.
Show resolved Hide resolved
bool align_offset = false;
// Alignment threshold for size of data.
// Having a low threshold will waste file space for small initializers.
// Only when tensor's data size is > the page_align_threshold it will be force aligned.
// Default to 1MB.
frank-dong-ms marked this conversation as resolved.
Show resolved Hide resolved
int64_t align_threshold = 1048576;
// The allocation Granularity for mmap() support.
// Typically 64KB for Windows & 4KB for other OSes. Default to 64KB.
int64_t allocation_granularity = 65536;
};

/** Gets the GraphProto representation of this Graph
@param external_file_path File path of the binary file to use for initializers.
@param model_file_path path of the model file.
@param initializer_size_threshold initializers larger or equal to this threshold (in bytes) are saved
in the external file. Initializer smaller than this threshold are included in the onnx file.
@param align_info offset alignment info.
@returns GraphProto serialization of the graph.
*/
ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
const std::filesystem::path& model_file_path,
size_t initializer_size_threshold) const;
size_t initializer_size_threshold,
const OffsetAlignmentInfo& align_info) const;

ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
const std::filesystem::path& model_file_path,
size_t initializer_size_threshold) const {
OffsetAlignmentInfo default_options;
return ToGraphProtoWithExternalInitializers(external_file_path, model_file_path, initializer_size_threshold, default_options);

Check warning on line 1182 in include/onnxruntime/core/graph/graph.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: include/onnxruntime/core/graph/graph.h:1182: Lines should be <= 120 characters long [whitespace/line_length] [2]
}

/** Gets the ISchemaRegistry instances being used with this Graph. */
IOnnxRuntimeOpSchemaCollectionPtr GetSchemaRegistry() const;
Expand Down
24 changes: 23 additions & 1 deletion onnxruntime/core/graph/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4021,7 +4021,8 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProto() const {

ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
const std::filesystem::path& model_file_path,
size_t initializer_size_threshold) const {
size_t initializer_size_threshold,
const OffsetAlignmentInfo& align_info) const {
GraphProto result;
ToGraphProtoInternal(result);
ORT_ENFORCE(external_file_path.is_relative());
Expand Down Expand Up @@ -4059,6 +4060,27 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std
continue;
}

// update external_offset for alignment
// need to do padding before write actual tensor data as we do offset alignment at the begin of
// large tensors (offset need to be page aligned and alloction granularity aligned) like below:
// \242\2557\256\023.\031&0000000000000000\332)k+\253\246\342\246(&\006!\347\232\374\236\325\026\032+\36XXXX
// |<---small tensor---->|<---padding--->|<------------------large tensor----------------------------->|
if (align_info.align_offset && static_cast<int64_t>(tensor_bytes_size) > align_info.align_threshold) {
// Align to the larger of the page size or the allocation granularity
int64_t alignment_factor = std::max(static_cast<int64_t>(4096), align_info.allocation_granularity);
// Align to the next page or alloc granularity boundary
int64_t new_external_offset = static_cast<int64_t>(
std::floor((external_offset + alignment_factor - 1) / alignment_factor)) *
alignment_factor;

// padding tensor with zeros for alignment
for (int64_t index = external_offset; index != new_external_offset; ++index) {
external_stream << '0';
}

external_offset = new_external_offset;
}

for (size_t index = 0; index != tensor_bytes_size; ++index) {
external_stream << raw_data[index];
}
Expand Down
24 changes: 16 additions & 8 deletions onnxruntime/core/graph/model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -383,12 +383,14 @@ ModelProto Model::ToProto() const {

ModelProto Model::ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name,
const std::filesystem::path& file_path,
size_t initializer_size_threshold) const {
size_t initializer_size_threshold,
const Graph::OffsetAlignmentInfo& align_info) const {
ModelProto result(model_proto_);
const auto& graph = *graph_;
*(result.mutable_graph()) = graph.ToGraphProtoWithExternalInitializers(external_file_name,
file_path,
initializer_size_threshold);
initializer_size_threshold,
align_info);
return result;
}

Expand Down Expand Up @@ -605,14 +607,16 @@ template <typename T>
static Status SaveModelWithExternalInitializers(Model& model,
const T& file_path,
const std::filesystem::path& external_file_name,
size_t initializer_size_threshold) {
size_t initializer_size_threshold,
const Graph::OffsetAlignmentInfo& align_info) {
int fd = 0;
Status status = Env::Default().FileOpenWr(file_path, fd);
ORT_RETURN_IF_ERROR(status);

ORT_TRY {
status = Model::SaveWithExternalInitializers(model, fd, file_path, external_file_name,
initializer_size_threshold);
initializer_size_threshold,
align_info);
}
ORT_CATCH(const std::exception& ex) {
ORT_HANDLE_EXCEPTION([&]() {
Expand Down Expand Up @@ -642,8 +646,10 @@ Status Model::Load(const PathString& file_path, std::shared_ptr<Model>& p_model,

Status Model::SaveWithExternalInitializers(Model& model, const std::filesystem::path& file_path,
const std::filesystem::path& external_file_name,
size_t initializer_size_threshold) {
return SaveModelWithExternalInitializers(model, file_path, external_file_name, initializer_size_threshold);
size_t initializer_size_threshold,
const Graph::OffsetAlignmentInfo& align_info) {
return SaveModelWithExternalInitializers(model, file_path, external_file_name, initializer_size_threshold,
align_info);
}

Status Model::LoadFromBytes(int count, const void* p_bytes, /*out*/ ONNX_NAMESPACE::ModelProto& model_proto) {
Expand Down Expand Up @@ -759,15 +765,17 @@ Status Model::SaveWithExternalInitializers(Model& model,
int fd,
const std::filesystem::path& file_path,
const std::filesystem::path& external_file_name,
size_t initializer_size_threshold) {
size_t initializer_size_threshold,
const Graph::OffsetAlignmentInfo& align_info) {
if (fd < 0) {
return Status(ONNXRUNTIME, INVALID_ARGUMENT, "<fd> is less than 0.");
}

ORT_RETURN_IF_ERROR(model.MainGraph().Resolve());

auto model_proto = model.ToGraphProtoWithExternalInitializers(external_file_name, file_path,
initializer_size_threshold);
initializer_size_threshold,
align_info);
google::protobuf::io::FileOutputStream output(fd);
const bool result = model_proto.SerializeToZeroCopyStream(&output) && output.Flush();
if (result) {
Expand Down
35 changes: 32 additions & 3 deletions onnxruntime/core/graph/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,25 +187,54 @@
// Get model's serialization proto data.
// Save initializer larger than the given threshold (in bytes) into an external binary file
// with the given name. This function is useful to avoid hitting the size limit of protobuf files.
// initializer offset could be page aligned and allocation granularity aligned for mmap support.
ONNX_NAMESPACE::ModelProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name,
const std::filesystem::path& file_path,
size_t initializer_size_threshold) const;
size_t initializer_size_threshold,
const Graph::OffsetAlignmentInfo& align_info) const;

ONNX_NAMESPACE::ModelProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_name,
const std::filesystem::path& file_path,
size_t initializer_size_threshold) const {
Graph::OffsetAlignmentInfo default_align_info;
return ToGraphProtoWithExternalInitializers(external_file_name, file_path, initializer_size_threshold, default_align_info);

Check warning on line 200 in onnxruntime/core/graph/model.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: onnxruntime/core/graph/model.h:200: Lines should be <= 120 characters long [whitespace/line_length] [2]
}

static common::Status Save(Model& model, const PathString& file_path);

static common::Status Save(Model& model, int fd);

// Save the model to file using an external file for initializers larger than the given threshold (in bytes).
// Initializer offset could be page aligned and allocation granularity aligned for mmap support.
static common::Status SaveWithExternalInitializers(Model& model,
const std::filesystem::path& file_path,
const std::filesystem::path& external_file_path,
size_t initializer_size_threshold,
const Graph::OffsetAlignmentInfo& align_info);

static common::Status SaveWithExternalInitializers(Model& model,
const std::filesystem::path& file_path,
const std::filesystem::path& external_file_path,
size_t initializer_size_threshold) {
Graph::OffsetAlignmentInfo default_align_info;
return SaveWithExternalInitializers(model, file_path, external_file_path, initializer_size_threshold, default_align_info);

Check warning on line 220 in onnxruntime/core/graph/model.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: onnxruntime/core/graph/model.h:220: Lines should be <= 120 characters long [whitespace/line_length] [2]
}

static common::Status SaveWithExternalInitializers(Model& model,
int fd,
const std::filesystem::path& file_path,
const std::filesystem::path& external_file_path,
size_t initializer_size_threshold);
size_t initializer_size_threshold,
const Graph::OffsetAlignmentInfo& align_info);

static common::Status SaveWithExternalInitializers(Model& model,
int fd,
const std::filesystem::path& file_path,
const std::filesystem::path& external_file_path,
size_t initializer_size_threshold);
size_t initializer_size_threshold) {
Graph::OffsetAlignmentInfo default_align_info;
return SaveWithExternalInitializers(model, fd, file_path, external_file_path, initializer_size_threshold, default_align_info);

Check warning on line 236 in onnxruntime/core/graph/model.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: onnxruntime/core/graph/model.h:236: Lines should be <= 120 characters long [whitespace/line_length] [2]
}

static common::Status Load(std::istream& model_istream, ONNX_NAMESPACE::ModelProto* p_model_proto);

Expand Down
5 changes: 4 additions & 1 deletion onnxruntime/core/session/inference_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2054,10 +2054,13 @@
const size_t optimized_model_external_initializers_min_size_in_bytes =
ParseStringWithClassicLocale<size_t>(session_options_.config_options.GetConfigOrDefault(
kOrtSessionOptionsOptimizedModelExternalInitializersMinSizeInBytes, "1024"));
Graph::OffsetAlignmentInfo align_info;
align_info.align_offset = true;
ORT_RETURN_IF_ERROR_SESSIONID_(Model::SaveWithExternalInitializers(*model_,
session_options_.optimized_model_filepath,
optimized_model_external_initializers_file_name,
optimized_model_external_initializers_min_size_in_bytes));
optimized_model_external_initializers_min_size_in_bytes,

Check warning on line 2062 in onnxruntime/core/session/inference_session.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: onnxruntime/core/session/inference_session.cc:2062: Lines should be <= 120 characters long [whitespace/line_length] [2]
align_info));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,15 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx,
const std::filesystem::path& input_external_init_file,
const std::filesystem::path& output_onnx,
const std::filesystem::path& output_external_init_file,
size_t initializer_size_threshold) {
size_t initializer_size_threshold,
const Graph::OffsetAlignmentInfo& align_info) {
auto logger = DefaultLoggingManager().CreateLogger("LoadSaveAndCompareModel");
std::shared_ptr<Model> model;
ORT_RETURN_IF_ERROR(Model::Load(input_onnx, model, nullptr, *logger));
std::filesystem::remove(output_onnx);
std::filesystem::remove(output_external_init_file);
ORT_RETURN_IF_ERROR(Model::SaveWithExternalInitializers(*model, output_onnx, output_external_init_file, initializer_size_threshold));
ORT_RETURN_IF_ERROR(Model::SaveWithExternalInitializers(*model, output_onnx, output_external_init_file, initializer_size_threshold,
align_info));

std::shared_ptr<Model> model_from_external;
ORT_RETURN_IF_ERROR(Model::Load(output_onnx.native(), model_from_external, nullptr, *logger));
Expand Down Expand Up @@ -75,6 +77,17 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx,

ORT_RETURN_IF_NOT(tensor_proto_size == from_external_tensor_proto_size, "size mismatch");
ORT_RETURN_IF_NOT(memcmp(tensor_proto_data.data(), from_external_tensor_proto_data.data(), tensor_proto_size) == 0, "data mismatch");

if (align_info.align_offset) {
for (const StringStringEntryProto& entry : from_external_tensor_proto->external_data()) {
if (entry.has_key() && entry.has_value() && entry.key() == "offset") {
size_t tensor_offset;
std::stringstream stream(entry.value());
stream >> tensor_offset;
ORT_RETURN_IF_NOT(tensor_offset % align_info.allocation_granularity == 0, "tensor offset not align");
}
}
}
}
// Cleanup.
ORT_RETURN_IF_NOT(std::filesystem::remove(output_onnx), "delete file failed");
Expand All @@ -84,12 +97,22 @@ Status LoadSaveAndCompareModel(const std::filesystem::path& input_onnx,

// Original model does not have external initializers
TEST(SaveWithExternalInitializers, Mnist) {
ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/mnist.onnx"), ORT_TSTR(""), ORT_TSTR("testdata/mnist_with_external_initializers.onnx"), ORT_TSTR("mnist_external_initializers.bin"), 100));
Graph::OffsetAlignmentInfo align_info;
ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/mnist.onnx"), ORT_TSTR(""), ORT_TSTR("testdata/mnist_with_external_initializers.onnx"), ORT_TSTR("mnist_external_initializers.bin"), 100, align_info));
}

// Original model has external initializers
TEST(SaveWithExternalInitializers, ModelWithOriginalExternalData) {
ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/model_with_orig_ext_data.onnx"), ORT_TSTR("model_with_orig_ext_data.onnx.data"), ORT_TSTR("testdata/model_with_new_external_initializers.onnx"), ORT_TSTR("model_with_new_external_initializers.bin"), 0));
Graph::OffsetAlignmentInfo align_info;
ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/model_with_orig_ext_data.onnx"), ORT_TSTR("model_with_orig_ext_data.onnx.data"), ORT_TSTR("testdata/model_with_new_external_initializers.onnx"), ORT_TSTR("model_with_new_external_initializers.bin"), 0, align_info));
}

// Original model has external initializers, align offset
TEST(SaveWithExternalInitializers, ModelWithOriginalExternalDataAlignOffset) {
frank-dong-ms marked this conversation as resolved.
Show resolved Hide resolved
Graph::OffsetAlignmentInfo align_info;
align_info.align_offset = true;
align_info.align_threshold = 0;
ASSERT_STATUS_OK(LoadSaveAndCompareModel(ORT_TSTR("testdata/model_with_orig_ext_data.onnx"), ORT_TSTR("model_with_orig_ext_data.onnx.data"), ORT_TSTR("testdata/model_with_new_external_initializers.onnx"), ORT_TSTR("model_with_new_external_initializers.bin"), 0, align_info));
}

} // namespace test
Expand Down
Loading