Skip to content

Commit

Permalink
Fix bug with saving model optimized by inference session (#16716)
Browse files Browse the repository at this point in the history
### Description
A [previous PR](#16531)
added a temporary directory to save the model optimizations after
loading a model into an `InferenceSession`. Many models that have an
external data file, however, require the data file to be in the same
directory as the ONNX model file. Because the model is saved in a
temporary directory and the data is saved in another directory, this
causes a `FileNotFoundError` error when trying to load the model in the
temporary directory.

This PR fixes this error by saving the external data file in the same
directory that the optimized model is located in.

### Motivation and Context
This PR fixes a bug with using a temporary directory while running the
optimizer for models that have an external data file.
  • Loading branch information
kunal-vaishnavi authored Jul 21, 2023
1 parent 0f9883f commit b7176f9
Show file tree
Hide file tree
Showing 8 changed files with 79 additions and 13 deletions.
1 change: 1 addition & 0 deletions include/onnxruntime/core/graph/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -1121,6 +1121,7 @@ class Graph {
@returns GraphProto serialization of the graph.
*/
ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::string& external_file_name,
const PathString& file_path,
size_t initializer_size_threshold) const;

/** Gets the ISchemaRegistry instances being used with this Graph. */
Expand Down
11 changes: 10 additions & 1 deletion onnxruntime/core/graph/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3381,11 +3381,20 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProto() const {
}

ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std::string& external_file_name,
const PathString& destination_file_path,
size_t initializer_size_threshold) const {
GraphProto result;
ToGraphProtoInternal(result);

std::ofstream external_stream(external_file_name, std::ofstream::out | std::ofstream::binary);
Path parent_path = Path::Parse(destination_file_path).ParentPath();
Path external_file_path = Path::Parse(ToPathString(external_file_name));
// Check if parent_path is relative path (length = 0)
if (parent_path.ToPathString().length()) {
// Save external data file in same directory as model
external_file_path = parent_path.Append(external_file_path);
}

std::ofstream external_stream(external_file_path.ToPathString(), std::ofstream::out | std::ofstream::binary);
ORT_ENFORCE(external_stream.is_open());
int64_t external_offset = 0;

Expand Down
7 changes: 5 additions & 2 deletions onnxruntime/core/graph/model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -344,10 +344,12 @@ ModelProto Model::ToProto() {
}

ModelProto Model::ToGraphProtoWithExternalInitializers(const std::string& external_file_name,
const PathString& file_path,
size_t initializer_size_threshold) {
ModelProto result(model_proto_);
const auto& graph = *graph_;
*(result.mutable_graph()) = graph.ToGraphProtoWithExternalInitializers(external_file_name,
file_path,
initializer_size_threshold);
return result;
}
Expand Down Expand Up @@ -572,7 +574,7 @@ static Status SaveModelWithExternalInitializers(Model& model,
ORT_RETURN_IF_ERROR(status);

ORT_TRY {
status = Model::SaveWithExternalInitializers(model, fd, external_file_name,
status = Model::SaveWithExternalInitializers(model, fd, file_path, external_file_name,
initializer_size_threshold);
}
ORT_CATCH(const std::exception& ex) {
Expand Down Expand Up @@ -722,6 +724,7 @@ Status Model::Save(Model& model, int p_fd) {

Status Model::SaveWithExternalInitializers(Model& model,
int fd,
const PathString& file_path,
const std::string& external_file_name,
size_t initializer_size_threshold) {
if (fd < 0) {
Expand All @@ -730,7 +733,7 @@ Status Model::SaveWithExternalInitializers(Model& model,

ORT_RETURN_IF_ERROR(model.MainGraph().Resolve());

auto model_proto = model.ToGraphProtoWithExternalInitializers(external_file_name, initializer_size_threshold);
auto model_proto = model.ToGraphProtoWithExternalInitializers(external_file_name, file_path, initializer_size_threshold);
google::protobuf::io::FileOutputStream output(fd);
const bool result = model_proto.SerializeToZeroCopyStream(&output) && output.Flush();
if (result) {
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/core/graph/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ class Model {
// Save initializer larger than the given threshold (in bytes) into an external binary file
// with the given name. This function is useful to avoid hitting the size limit of protobuf files.
ONNX_NAMESPACE::ModelProto ToGraphProtoWithExternalInitializers(const std::string& external_file_name,
const PathString& file_path,
size_t initializer_size_threshold);

#ifdef _WIN32
Expand All @@ -217,6 +218,7 @@ class Model {

static common::Status SaveWithExternalInitializers(Model& model,
int fd,
const PathString& file_path,
const std::string& external_file_name,
size_t initializer_size_threshold);

Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/python/tools/transformers/onnx_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1021,13 +1021,13 @@ def save(
location = Path(external_data_path).name if all_tensors_to_one_file else None

if os.path.exists(output_path):
logger.info(f"Delete the existed onnx file: {output_path}")
logger.info(f"Delete the existing onnx file: {output_path}")
os.remove(output_path)

if all_tensors_to_one_file:
if os.path.exists(external_data_path):
# Delete the external data file. Otherwise, data will be appended to existing file.
logger.info(f"Delete the existed external data file: {external_data_path}")
logger.info(f"Delete the existing external data file: {external_data_path}")
os.remove(external_data_path)
else:
if os.listdir(output_dir):
Expand Down
35 changes: 29 additions & 6 deletions onnxruntime/python/tools/transformers/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import logging
import os
import tempfile
from typing import Dict, Optional
from typing import Dict, List, Optional

import coloredlogs
from fusion_options import FusionOptions
Expand Down Expand Up @@ -64,8 +64,11 @@ def optimize_by_onnxruntime(
use_gpu: bool = False,
optimized_model_path: Optional[str] = None,
opt_level: Optional[int] = 99,
disabled_optimizers=[], # noqa: B006
verbose=False,
disabled_optimizers: List[str] = [], # noqa: B006
verbose: bool = False,
save_as_external_data: bool = False,
external_data_filename: str = "",
external_data_file_threshold: int = 1024,
) -> str:
"""
Use onnxruntime to optimize model.
Expand All @@ -76,6 +79,9 @@ def optimize_by_onnxruntime(
optimized_model_path (str or None): the path of optimized model.
opt_level (int): graph optimization level.
disabled_optimizers (List[str]): a list of names of disabled optimizers
save_as_external_data (bool): whether to save external data outside of ONNX model
external_data_filename (str): name of external data file. If not provided, name is automatically created from ONNX model.
external_data_file_threshold (int): threshold to decide whether to save tensor in ONNX model or in external data file
Returns:
optimized_model_path (str): the path of optimized model
"""
Expand Down Expand Up @@ -112,6 +118,16 @@ def optimize_by_onnxruntime(
optimized_model_path = "{}_o{}_{}.onnx".format(path_prefix, opt_level, "gpu" if use_gpu else "cpu")

sess_options.optimized_model_filepath = optimized_model_path
if save_as_external_data:
if len(external_data_filename) == 0:
# Set external data filename to model_name.onnx.data
external_data_filename = os.path.basename(optimized_model_path) + ".data"
sess_options.add_session_config_entry(
"session.optimized_model_external_initializers_file_name", external_data_filename
)
sess_options.add_session_config_entry(
"session.optimized_model_external_initializers_min_size_in_bytes", str(external_data_file_threshold)
)

if verbose:
print("Using onnxruntime to optimize model - Debug level Set to verbose")
Expand Down Expand Up @@ -203,7 +219,8 @@ def optimize_model(
opt_level: Optional[int] = None,
use_gpu: bool = False,
only_onnxruntime: bool = False,
verbose=False,
verbose: bool = False,
use_external_data_format: bool = False,
):
"""Optimize Model by OnnxRuntime and/or python fusion logic.
Expand Down Expand Up @@ -241,6 +258,8 @@ def optimize_model(
use_gpu (bool, optional): use gpu or not for onnxruntime. Defaults to False.
only_onnxruntime (bool, optional): only use onnxruntime to optimize model, and no python fusion.
Defaults to False.
use_external_data_format (bool, optional): use external data format when saving optimized model.
Defaults to False.
Returns:
object of an optimizer class.
Expand All @@ -260,6 +279,7 @@ def optimize_model(
temp_dir = tempfile.TemporaryDirectory()
optimized_model_name = "model_o{}_{}.onnx".format(opt_level, "gpu" if use_gpu else "cpu")
optimized_model_path = os.path.join(temp_dir.name, optimized_model_name)

if opt_level > 1:
# Disable some optimizers that might cause failure in symbolic shape inference or attention fusion.
disabled_optimizers += (
Expand All @@ -276,10 +296,11 @@ def optimize_model(
temp_model_path = optimize_by_onnxruntime(
input,
use_gpu=use_gpu,
optimized_model_path=optimized_model_path,
opt_level=opt_level,
disabled_optimizers=disabled_optimizers,
verbose=verbose,
optimized_model_path=optimized_model_path,
save_as_external_data=use_external_data_format,
)
elif opt_level == 1:
# basic optimizations (like constant folding and cast elimination) are not specified to execution provider.
Expand All @@ -289,10 +310,11 @@ def optimize_model(
temp_model_path = optimize_by_onnxruntime(
input,
use_gpu=use_gpu,
optimized_model_path=optimized_model_path,
opt_level=1,
disabled_optimizers=disabled_optimizers,
verbose=verbose,
optimized_model_path=optimized_model_path,
save_as_external_data=use_external_data_format,
)

if only_onnxruntime and not temp_model_path:
Expand Down Expand Up @@ -474,6 +496,7 @@ def main():
optimization_options=optimization_options,
use_gpu=args.use_gpu,
only_onnxruntime=args.only_onnxruntime,
use_external_data_format=args.use_external_data_format,
)

if args.float16:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include "core/common/path_string.h"
#include "core/framework/data_types.h"
#include "core/graph/model.h"
#include "core/framework/tensorprotoutils.h"
Expand Down Expand Up @@ -41,6 +42,7 @@ void LoadSaveAndCompareModel(const std::string& input_onnx,
ASSERT_EQ(initializers.size(), initializers_from_external.size());

// Compare the initializers of the two versions.
Path external_data_path{};
for (auto i : initializers) {
const std::string kInitName = i.first;
const ONNX_NAMESPACE::TensorProto* tensor_proto = i.second;
Expand All @@ -51,7 +53,9 @@ void LoadSaveAndCompareModel(const std::string& input_onnx,
size_t tensor_proto_size = tensor_proto_data.size();

std::vector<uint8_t> from_external_tensor_proto_data;
ORT_THROW_IF_ERROR(utils::UnpackInitializerData(*from_external_tensor_proto, Path(), from_external_tensor_proto_data));
Path model_path = Path::Parse(ToPathString(output_onnx));
external_data_path = model_path.ParentPath().Append(Path::Parse(ToPathString(external_init_file)));
ORT_THROW_IF_ERROR(utils::UnpackInitializerData(*from_external_tensor_proto, model_path, from_external_tensor_proto_data));
size_t from_external_tensor_proto_size = from_external_tensor_proto_data.size();

if (from_external_tensor_proto_size < initializer_size_threshold) {
Expand All @@ -67,7 +71,7 @@ void LoadSaveAndCompareModel(const std::string& input_onnx,
}
// Cleanup.
ASSERT_EQ(std::remove(output_onnx.c_str()), 0);
ASSERT_EQ(std::remove(external_init_file.c_str()), 0);
ASSERT_EQ(std::remove(PathToUTF8String(external_data_path.ToPathString()).c_str()), 0);
}

TEST(SaveWithExternalInitializers, Mnist) {
Expand Down
24 changes: 24 additions & 0 deletions onnxruntime/test/python/onnxruntime_test_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,30 @@ def testModelSerializationWithExternalInitializers(self): # noqa: N802
else:
raise onnxruntime_error

def testModelSerializationWithExternalInitializersToDirectory(self): # noqa: N802
try:
so = onnxrt.SessionOptions()
so.log_severity_level = 1
so.logid = "TestModelSerializationWithExternalInitializersToDirectory"
directory = "./testdata/"
so.optimized_model_filepath = os.path.join(directory, "model_with_external_initializers_in_dir.onnx")
external_initializers_file = "external_initializers_in_dir.bin"
so.add_session_config_entry(
"session.optimized_model_external_initializers_file_name", external_initializers_file
)
so.add_session_config_entry("session.optimized_model_external_initializers_min_size_in_bytes", "100")
onnxrt.InferenceSession(get_name("mnist.onnx"), sess_options=so, providers=["CPUExecutionProvider"])
self.assertTrue(os.path.isfile(so.optimized_model_filepath))
self.assertTrue(os.path.isfile(os.path.join(directory, external_initializers_file)))
except Fail as onnxruntime_error:
if (
str(onnxruntime_error) == "[ONNXRuntimeError] : 1 : FAIL : Unable to serialize model as it contains"
" compiled nodes. Please disable any execution providers which generate compiled nodes."
):
pass
else:
raise onnxruntime_error

def testGetProviders(self): # noqa: N802
self.assertTrue("CPUExecutionProvider" in onnxrt.get_available_providers())
# get_all_providers() returns the default EP order from highest to lowest.
Expand Down

0 comments on commit b7176f9

Please sign in to comment.