Skip to content

Commit

Permalink
[NNAPI EP] Track skipped initializer usage (microsoft#21286)
Browse files Browse the repository at this point in the history
Track skipped initializer usage in NNAPI EP to account for usage by other nodes.
  • Loading branch information
edgchen1 authored Jul 9, 2024
1 parent 1ab162f commit 307b34a
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,13 @@ DEFINE_ADD_OPERAND_FROM_SCALAR(float, FLOAT32);
#undef DEFINE_ADD_OPERAND_FROM_SCALAR

void ModelBuilder::AddInitializerToSkip(const std::string& tensor_name) {
skipped_initializers_.insert(tensor_name);
// decrement usage count if this is a known initializer.
// For simplicity the OpBuilder::AddInitializersToSkip implementations may call this for arbitrary input names
// without first checking if the value is an initializer.
auto entry = initializer_usage_.find(tensor_name);
if (entry != initializer_usage_.end()) {
entry->second -= 1;
}
}

Status ModelBuilder::Prepare() {
Expand Down Expand Up @@ -87,7 +93,16 @@ static size_t GetPaddedByteSize(size_t size) {
}

void ModelBuilder::PreprocessInitializers() {
const auto& initializers = GetInitializerTensors();

for (const auto& node_unit : node_unit_holder_) {
// find all initializers consumed. AddInitializersToSkip will potentially decrement the usage count.
for (const auto& input : node_unit->Inputs()) {
if (input.node_arg.Exists() && Contains(initializers, input.node_arg.Name())) {
initializer_usage_[input.node_arg.Name()]++;
}
}

if (const auto* op_builder = GetOpBuilder(*node_unit)) {
op_builder->AddInitializersToSkip(*this, *node_unit);
}
Expand Down Expand Up @@ -208,11 +223,16 @@ Status ModelBuilder::RegisterInitializers() {
std::vector<std::tuple<uint32_t, size_t, size_t>> initializers(initializer_size);
size_t sizeAll = 0;

const auto should_skip_initializer = [this](const std::string& name) -> bool {
const auto it = initializer_usage_.find(name);
return it == initializer_usage_.end() || it->second == 0;
};

int i = 0;
for (const auto& pair : initializer_tensors) {
const auto& tensor = *pair.second;
const auto& name = tensor.name();
if (Contains(skipped_initializers_, name))
if (should_skip_initializer(name))
continue;

Shape shape;
Expand Down Expand Up @@ -249,7 +269,7 @@ Status ModelBuilder::RegisterInitializers() {
size_t offset = 0;
for (const auto& pair : initializer_tensors) {
const auto& tensor = *pair.second;
if (Contains(skipped_initializers_, tensor.name()))
if (should_skip_initializer(tensor.name()))
continue;

auto [index, size, padded_size] = initializers[i++];
Expand Down Expand Up @@ -439,10 +459,11 @@ Status ModelBuilder::AddOperandFromPersistMemoryBuffer(
Status ModelBuilder::AddOperations() {
const auto& node_indices = graph_viewer_.GetNodesInTopologicalOrder();
for (const auto node_idx : node_indices) {
LOGS_DEFAULT(VERBOSE) << "Adding node [" << node_idx << "]";
const auto* node(graph_viewer_.GetNode(node_idx));
const NodeUnit& node_unit = GetNodeUnit(node);

LOGS_DEFAULT(VERBOSE) << "Adding node [" << node_unit.Name() << "] at index [" << node_unit.Index() << "]";

// Since we may have NodeUnit with multiple nodes, insert NodeUnit with the first occurrence of
// its node(s) in topological order may cause the incorrect topological order while inserting
// NodeUNits, for example,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ class ModelBuilder {
std::unordered_set<std::string> operands_;
std::unordered_set<std::string> fused_activations_;

std::unordered_set<std::string> skipped_initializers_;
std::unordered_map<std::string, int> initializer_usage_;

// All activation nodes (Relu, Relu1, Relu6) as a map <const NodeUnit*, activation_code>
std::unordered_map<const NodeUnit*, int32_t> activation_node_units_;
Expand Down
51 changes: 40 additions & 11 deletions onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "test/common/tensor_op_test_utils.h"
#include "test/framework/test_utils.h"
#include "test/util/include/asserts.h"
#include "test/util/include/current_test_name.h"
#include "test/util/include/default_providers.h"
#include "test/util/include/inference_session_wrapper.h"
#include "test/util/include/test/test_environment.h"
Expand All @@ -36,10 +37,6 @@ using namespace ::onnxruntime::logging;
namespace onnxruntime {
namespace test {

#if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)

#endif // !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)

#if !defined(ORT_MINIMAL_BUILD)

// Since NNAPI EP handles Reshape and Flatten differently,
Expand All @@ -65,7 +62,8 @@ TEST(NnapiExecutionProviderTest, ReshapeFlattenTest) {
feeds.insert(std::make_pair("X", ml_value_x));
feeds.insert(std::make_pair("Y", ml_value_y));

RunAndVerifyOutputsWithEP(model_file_name, "NnapiExecutionProviderTest.ReshapeFlattenTest",
RunAndVerifyOutputsWithEP(model_file_name,
CurrentTestName(),
std::make_unique<NnapiExecutionProvider>(0),
feeds);
#else
Expand All @@ -88,7 +86,8 @@ TEST(NnapiExecutionProviderTest, SigmoidSupportedInputRankTest) {
NameMLValMap feeds;
feeds.insert(std::make_pair("X", ml_value_x));

RunAndVerifyOutputsWithEP(model_file_name, "NnapiExecutionProviderTest.SigmoidSupportedInputRankTest",
RunAndVerifyOutputsWithEP(model_file_name,
CurrentTestName(),
std::make_unique<NnapiExecutionProvider>(0),
feeds, {ExpectedEPNodeAssignment::None} /* params */);
#else
Expand All @@ -115,7 +114,8 @@ TEST(NnapiExecutionProviderTest, DynamicGraphInputTest) {
NameMLValMap feeds;
feeds.insert(std::make_pair("X", ml_value_x));

RunAndVerifyOutputsWithEP(model_file_name, "NnapiExecutionProviderTest.DynamicGraphInputTest",
RunAndVerifyOutputsWithEP(model_file_name,
CurrentTestName(),
std::make_unique<NnapiExecutionProvider>(0),
feeds);
#else
Expand Down Expand Up @@ -144,7 +144,8 @@ TEST(NnapiExecutionProviderTest, InternalUint8SupportTest) {
NameMLValMap feeds;
feeds.insert(std::make_pair("X", ml_value_x));

RunAndVerifyOutputsWithEP(model_file_name, "NnapiExecutionProviderTest.InternalUint8SupportTest",
RunAndVerifyOutputsWithEP(model_file_name,
CurrentTestName(),
std::make_unique<NnapiExecutionProvider>(0),
feeds);
#else
Expand Down Expand Up @@ -208,7 +209,8 @@ TEST(NnapiExecutionProviderTest, FunctionTest) {
feeds.insert(std::make_pair("Y", ml_value_y));
feeds.insert(std::make_pair("Z", ml_value_z));

RunAndVerifyOutputsWithEP(model_file_name, "NnapiExecutionProviderTest.FunctionTest",
RunAndVerifyOutputsWithEP(model_file_name,
CurrentTestName(),
std::make_unique<NnapiExecutionProvider>(0),
feeds);
#else
Expand Down Expand Up @@ -273,7 +275,8 @@ static void RunQDQModelTest(
const auto model_data_span = AsByteSpan(model_data.data(), model_data.size());

#if defined(__ANDROID__)
RunAndVerifyOutputsWithEP(model_data_span, "NnapiExecutionProviderTest.TestQDQModel",
RunAndVerifyOutputsWithEP(model_data_span,
CurrentTestName(),
std::make_unique<NnapiExecutionProvider>(0),
helper.feeds_, params);
#else
Expand Down Expand Up @@ -513,6 +516,31 @@ TEST(NnapiExecutionProviderTest, TestGather) {
{ExpectedEPNodeAssignment::All});
}

TEST(NnapiExecutionProviderTest, SharedInitializersDoNotGetSkipped) {
// NNAPI EP's Clip op builder will mark the max initializer as skipped but it is also used by the Div op.
// Test that the shared initializer is still present in the NNAPI model for the Div op.
constexpr auto* model_file_name = ORT_TSTR("testdata/clip_div_shared_initializer.onnx");

#if defined(__ANDROID__)
AllocatorPtr cpu_allocator = std::make_shared<CPUAllocator>();

std::vector<int64_t> x_dims{3, 2};
std::vector<float> x_values(3.0f, 3 * 2);
OrtValue ml_value_x;
CreateMLValue<float>(cpu_allocator, x_dims, x_values, &ml_value_x);

NameMLValMap feeds{{"input_0", ml_value_x}};

RunAndVerifyOutputsWithEP(model_file_name,
CurrentTestName(),
std::make_unique<NnapiExecutionProvider>(0),
feeds,
{ExpectedEPNodeAssignment::All});
#else
TestModelLoad(model_file_name, std::make_unique<NnapiExecutionProvider>(0), ExpectedEPNodeAssignment::All);
#endif
}

#endif // !(ORT_MINIMAL_BUILD)

TEST(NnapiExecutionProviderTest, NNAPIFlagsTest) {
Expand Down Expand Up @@ -541,7 +569,8 @@ TEST(NnapiExecutionProviderTest, TestOrtFormatModel) {
NameMLValMap feeds;
feeds.insert(std::make_pair("Input3", ml_value));

RunAndVerifyOutputsWithEP(model_file_name, "NnapiExecutionProviderTest.TestOrtFormatModel",
RunAndVerifyOutputsWithEP(model_file_name,
CurrentTestName(),
std::make_unique<NnapiExecutionProvider>(0),
feeds);
#else
Expand Down
Binary file not shown.
33 changes: 33 additions & 0 deletions onnxruntime/test/testdata/clip_div_shared_initializer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from onnx import TensorProto, checker, helper, save

graph_proto = helper.make_graph(
[
helper.make_node(
"Clip",
inputs=["input_0", "initializer_0", "initializer_1"],
outputs=["clip_output"],
name="clip",
),
helper.make_node(
"Div",
inputs=["clip_output", "initializer_1"],
outputs=["output_0"],
name="div",
),
],
"Main_graph",
[
helper.make_tensor_value_info("input_0", TensorProto.FLOAT, [3, 2]),
],
[
helper.make_tensor_value_info("output_0", TensorProto.FLOAT, [3, 2]),
],
[
helper.make_tensor("initializer_0", TensorProto.FLOAT, [], [0.0]),
helper.make_tensor("initializer_1", TensorProto.FLOAT, [], [6.0]),
],
)

model = helper.make_model(graph_proto)
checker.check_model(model, True)
save(model, "clip_div_shared_initializer.onnx")

0 comments on commit 307b34a

Please sign in to comment.