Fix bug with saving model optimized by inference session (#16716)

### Description A [previous PR](#16531) added a temporary directory to save the model optimizations after loading a model into an `InferenceSession`. Many models that have an external data file, however, require the data file to be in the same directory as the ONNX model file. Because the model is saved in a temporary directory and the data is saved in another directory, this causes a `FileNotFoundError` error when trying to load the model in the temporary directory. This PR fixes this error by saving the external data file in the same directory that the optimized model is located in. ### Motivation and Context This PR fixes a bug with using a temporary directory while running the optimizer for models that have an external data file.
microsoft · Jul 21, 2023 · b7176f9 · b7176f9
1 parent 0f9883f
commit b7176f9
Show file tree

Hide file tree

Showing 8 changed files with 79 additions and 13 deletions.
diff --git a/include/onnxruntime/core/graph/graph.h b/include/onnxruntime/core/graph/graph.h
@@ -1121,6 +1121,7 @@ class Graph {
   @returns GraphProto serialization of the graph.
   */
   ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::string& external_file_name,
+                                                                  const PathString& file_path,
                                                                   size_t initializer_size_threshold) const;
 
   /** Gets the ISchemaRegistry instances being used with this Graph. */

diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc
@@ -3381,11 +3381,20 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProto() const {
 }
 
 ONNX_NAMESPACE::GraphProto Graph::ToGraphProtoWithExternalInitializers(const std::string& external_file_name,
+                                                                       const PathString& destination_file_path,
                                                                        size_t initializer_size_threshold) const {
   GraphProto result;
   ToGraphProtoInternal(result);
 
-  std::ofstream external_stream(external_file_name, std::ofstream::out | std::ofstream::binary);
+  Path parent_path = Path::Parse(destination_file_path).ParentPath();
+  Path external_file_path = Path::Parse(ToPathString(external_file_name));
+  // Check if parent_path is relative path (length = 0)
+  if (parent_path.ToPathString().length()) {
+    // Save external data file in same directory as model
+    external_file_path = parent_path.Append(external_file_path);
+  }
+
+  std::ofstream external_stream(external_file_path.ToPathString(), std::ofstream::out | std::ofstream::binary);
   ORT_ENFORCE(external_stream.is_open());
   int64_t external_offset = 0;
 

diff --git a/onnxruntime/core/graph/model.cc b/onnxruntime/core/graph/model.cc
@@ -344,10 +344,12 @@ ModelProto Model::ToProto() {
 }
 
 ModelProto Model::ToGraphProtoWithExternalInitializers(const std::string& external_file_name,
+                                                       const PathString& file_path,
                                                        size_t initializer_size_threshold) {
   ModelProto result(model_proto_);
   const auto& graph = *graph_;
   *(result.mutable_graph()) = graph.ToGraphProtoWithExternalInitializers(external_file_name,
+                                                                         file_path,
                                                                          initializer_size_threshold);
   return result;
 }
@@ -572,7 +574,7 @@ static Status SaveModelWithExternalInitializers(Model& model,
   ORT_RETURN_IF_ERROR(status);
 
   ORT_TRY {
-    status = Model::SaveWithExternalInitializers(model, fd, external_file_name,
+    status = Model::SaveWithExternalInitializers(model, fd, file_path, external_file_name,
                                                  initializer_size_threshold);
   }
   ORT_CATCH(const std::exception& ex) {
@@ -722,6 +724,7 @@ Status Model::Save(Model& model, int p_fd) {
 
 Status Model::SaveWithExternalInitializers(Model& model,
                                            int fd,
+                                           const PathString& file_path,
                                            const std::string& external_file_name,
                                            size_t initializer_size_threshold) {
   if (fd < 0) {
@@ -730,7 +733,7 @@ Status Model::SaveWithExternalInitializers(Model& model,
 
   ORT_RETURN_IF_ERROR(model.MainGraph().Resolve());
 
-  auto model_proto = model.ToGraphProtoWithExternalInitializers(external_file_name, initializer_size_threshold);
+  auto model_proto = model.ToGraphProtoWithExternalInitializers(external_file_name, file_path, initializer_size_threshold);
   google::protobuf::io::FileOutputStream output(fd);
   const bool result = model_proto.SerializeToZeroCopyStream(&output) && output.Flush();
   if (result) {

diff --git a/onnxruntime/core/graph/model.h b/onnxruntime/core/graph/model.h
@@ -191,6 +191,7 @@ class Model {
   // Save initializer larger than the given threshold (in bytes) into an external binary file
   // with the given name. This function is useful to avoid hitting the size limit of protobuf files.
   ONNX_NAMESPACE::ModelProto ToGraphProtoWithExternalInitializers(const std::string& external_file_name,
+                                                                  const PathString& file_path,
                                                                   size_t initializer_size_threshold);
 
 #ifdef _WIN32
@@ -217,6 +218,7 @@ class Model {
 
   static common::Status SaveWithExternalInitializers(Model& model,
                                                      int fd,
+                                                     const PathString& file_path,
                                                      const std::string& external_file_name,
                                                      size_t initializer_size_threshold);
 

diff --git a/onnxruntime/python/tools/transformers/onnx_model.py b/onnxruntime/python/tools/transformers/onnx_model.py
@@ -1021,13 +1021,13 @@ def save(
             location = Path(external_data_path).name if all_tensors_to_one_file else None
 
             if os.path.exists(output_path):
-                logger.info(f"Delete the existed onnx file: {output_path}")
+                logger.info(f"Delete the existing onnx file: {output_path}")
                 os.remove(output_path)
 
             if all_tensors_to_one_file:
                 if os.path.exists(external_data_path):
                     # Delete the external data file. Otherwise, data will be appended to existing file.
-                    logger.info(f"Delete the existed external data file: {external_data_path}")
+                    logger.info(f"Delete the existing external data file: {external_data_path}")
                     os.remove(external_data_path)
             else:
                 if os.listdir(output_dir):

diff --git a/onnxruntime/python/tools/transformers/optimizer.py b/onnxruntime/python/tools/transformers/optimizer.py
@@ -21,7 +21,7 @@
 import logging
 import os
 import tempfile
-from typing import Dict, Optional
+from typing import Dict, List, Optional
 
 import coloredlogs
 from fusion_options import FusionOptions
@@ -64,8 +64,11 @@ def optimize_by_onnxruntime(
     use_gpu: bool = False,
     optimized_model_path: Optional[str] = None,
     opt_level: Optional[int] = 99,
-    disabled_optimizers=[],  # noqa: B006
-    verbose=False,
+    disabled_optimizers: List[str] = [],  # noqa: B006
+    verbose: bool = False,
+    save_as_external_data: bool = False,
+    external_data_filename: str = "",
+    external_data_file_threshold: int = 1024,
 ) -> str:
     """
     Use onnxruntime to optimize model.
@@ -76,6 +79,9 @@ def optimize_by_onnxruntime(
         optimized_model_path (str or None): the path of optimized model.
         opt_level (int): graph optimization level.
         disabled_optimizers (List[str]): a list of names of disabled optimizers
+        save_as_external_data (bool): whether to save external data outside of ONNX model
+        external_data_filename (str): name of external data file. If not provided, name is automatically created from ONNX model.
+        external_data_file_threshold (int): threshold to decide whether to save tensor in ONNX model or in external data file
     Returns:
         optimized_model_path (str): the path of optimized model
     """
@@ -112,6 +118,16 @@ def optimize_by_onnxruntime(
         optimized_model_path = "{}_o{}_{}.onnx".format(path_prefix, opt_level, "gpu" if use_gpu else "cpu")
 
     sess_options.optimized_model_filepath = optimized_model_path
+    if save_as_external_data:
+        if len(external_data_filename) == 0:
+            # Set external data filename to model_name.onnx.data
+            external_data_filename = os.path.basename(optimized_model_path) + ".data"
+        sess_options.add_session_config_entry(
+            "session.optimized_model_external_initializers_file_name", external_data_filename
+        )
+        sess_options.add_session_config_entry(
+            "session.optimized_model_external_initializers_min_size_in_bytes", str(external_data_file_threshold)
+        )
 
     if verbose:
         print("Using onnxruntime to optimize model - Debug level Set to verbose")
@@ -203,7 +219,8 @@ def optimize_model(
     opt_level: Optional[int] = None,
     use_gpu: bool = False,
     only_onnxruntime: bool = False,
-    verbose=False,
+    verbose: bool = False,
+    use_external_data_format: bool = False,
 ):
     """Optimize Model by OnnxRuntime and/or python fusion logic.
 
@@ -241,6 +258,8 @@ def optimize_model(
         use_gpu (bool, optional): use gpu or not for onnxruntime. Defaults to False.
         only_onnxruntime (bool, optional): only use onnxruntime to optimize model, and no python fusion.
             Defaults to False.
+        use_external_data_format (bool, optional): use external data format when saving optimized model.
+            Defaults to False.
 
      Returns:
         object of an optimizer class.
@@ -260,6 +279,7 @@ def optimize_model(
     temp_dir = tempfile.TemporaryDirectory()
     optimized_model_name = "model_o{}_{}.onnx".format(opt_level, "gpu" if use_gpu else "cpu")
     optimized_model_path = os.path.join(temp_dir.name, optimized_model_name)
+
     if opt_level > 1:
         # Disable some optimizers that might cause failure in symbolic shape inference or attention fusion.
         disabled_optimizers += (
@@ -276,10 +296,11 @@ def optimize_model(
         temp_model_path = optimize_by_onnxruntime(
             input,
             use_gpu=use_gpu,
+            optimized_model_path=optimized_model_path,
             opt_level=opt_level,
             disabled_optimizers=disabled_optimizers,
             verbose=verbose,
-            optimized_model_path=optimized_model_path,
+            save_as_external_data=use_external_data_format,
         )
     elif opt_level == 1:
         # basic optimizations (like constant folding and cast elimination) are not specified to execution provider.
@@ -289,10 +310,11 @@ def optimize_model(
         temp_model_path = optimize_by_onnxruntime(
             input,
             use_gpu=use_gpu,
+            optimized_model_path=optimized_model_path,
             opt_level=1,
             disabled_optimizers=disabled_optimizers,
             verbose=verbose,
-            optimized_model_path=optimized_model_path,
+            save_as_external_data=use_external_data_format,
         )
 
     if only_onnxruntime and not temp_model_path:
@@ -474,6 +496,7 @@ def main():
         optimization_options=optimization_options,
         use_gpu=args.use_gpu,
         only_onnxruntime=args.only_onnxruntime,
+        use_external_data_format=args.use_external_data_format,
     )
 
     if args.float16:

diff --git a/onnxruntime/test/framework/save_model_with_external_initializers.cc b/onnxruntime/test/framework/save_model_with_external_initializers.cc
@@ -1,6 +1,7 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#include "core/common/path_string.h"
 #include "core/framework/data_types.h"
 #include "core/graph/model.h"
 #include "core/framework/tensorprotoutils.h"
@@ -41,6 +42,7 @@ void LoadSaveAndCompareModel(const std::string& input_onnx,
   ASSERT_EQ(initializers.size(), initializers_from_external.size());
 
   // Compare the initializers of the two versions.
+  Path external_data_path{};
   for (auto i : initializers) {
     const std::string kInitName = i.first;
     const ONNX_NAMESPACE::TensorProto* tensor_proto = i.second;
@@ -51,7 +53,9 @@ void LoadSaveAndCompareModel(const std::string& input_onnx,
     size_t tensor_proto_size = tensor_proto_data.size();
 
     std::vector<uint8_t> from_external_tensor_proto_data;
-    ORT_THROW_IF_ERROR(utils::UnpackInitializerData(*from_external_tensor_proto, Path(), from_external_tensor_proto_data));
+    Path model_path = Path::Parse(ToPathString(output_onnx));
+    external_data_path = model_path.ParentPath().Append(Path::Parse(ToPathString(external_init_file)));
+    ORT_THROW_IF_ERROR(utils::UnpackInitializerData(*from_external_tensor_proto, model_path, from_external_tensor_proto_data));
     size_t from_external_tensor_proto_size = from_external_tensor_proto_data.size();
 
     if (from_external_tensor_proto_size < initializer_size_threshold) {
@@ -67,7 +71,7 @@ void LoadSaveAndCompareModel(const std::string& input_onnx,
   }
   // Cleanup.
   ASSERT_EQ(std::remove(output_onnx.c_str()), 0);
-  ASSERT_EQ(std::remove(external_init_file.c_str()), 0);
+  ASSERT_EQ(std::remove(PathToUTF8String(external_data_path.ToPathString()).c_str()), 0);
 }
 
 TEST(SaveWithExternalInitializers, Mnist) {

diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py
@@ -121,6 +121,30 @@ def testModelSerializationWithExternalInitializers(self):  # noqa: N802
             else:
                 raise onnxruntime_error
 
+    def testModelSerializationWithExternalInitializersToDirectory(self):  # noqa: N802
+        try:
+            so = onnxrt.SessionOptions()
+            so.log_severity_level = 1
+            so.logid = "TestModelSerializationWithExternalInitializersToDirectory"
+            directory = "./testdata/"
+            so.optimized_model_filepath = os.path.join(directory, "model_with_external_initializers_in_dir.onnx")
+            external_initializers_file = "external_initializers_in_dir.bin"
+            so.add_session_config_entry(
+                "session.optimized_model_external_initializers_file_name", external_initializers_file
+            )
+            so.add_session_config_entry("session.optimized_model_external_initializers_min_size_in_bytes", "100")
+            onnxrt.InferenceSession(get_name("mnist.onnx"), sess_options=so, providers=["CPUExecutionProvider"])
+            self.assertTrue(os.path.isfile(so.optimized_model_filepath))
+            self.assertTrue(os.path.isfile(os.path.join(directory, external_initializers_file)))
+        except Fail as onnxruntime_error:
+            if (
+                str(onnxruntime_error) == "[ONNXRuntimeError] : 1 : FAIL : Unable to serialize model as it contains"
+                " compiled nodes. Please disable any execution providers which generate compiled nodes."
+            ):
+                pass
+            else:
+                raise onnxruntime_error
+
     def testGetProviders(self):  # noqa: N802
         self.assertTrue("CPUExecutionProvider" in onnxrt.get_available_providers())
         # get_all_providers() returns the default EP order from highest to lowest.