Skip to content

Commit

Permalink
Rollback 19832, Remove shape_input_merge Fusion (#21179)
Browse files Browse the repository at this point in the history
The PR caused Big Models pipeline failure for running Llama2. After the
rollback, the pipeline is back to normal.
  • Loading branch information
centwang authored Jun 26, 2024
1 parent 337cc56 commit 3c0b407
Show file tree
Hide file tree
Showing 5 changed files with 6 additions and 186 deletions.
8 changes: 3 additions & 5 deletions onnxruntime/core/optimizer/graph_transformer_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@
#include "core/optimizer/reshape_fusion.h"
#include "core/optimizer/rocm_blas_alt_impl.h"
#include "core/optimizer/rule_based_graph_transformer.h"
#include "core/optimizer/shape_input_merge.h"
#include "core/optimizer/skip_layer_norm_fusion.h"
#include "core/optimizer/slice_elimination.h"
#include "core/optimizer/transpose_optimizer.h"
Expand Down Expand Up @@ -215,17 +214,16 @@ InlinedVector<std::unique_ptr<GraphTransformer>> GenerateTransformers(
transformers.emplace_back(std::make_unique<DoubleQDQPairsRemover>());
}

// Put ConstantSharing and ShapeInputMerge before CommonSubexpressionElimination by intention as it can create
// more opportunities for CSE. For example, if A and B nodes consume same different args but produce same output
// or consume different initializers with same value, by default, CSE will not merge them.
// Put ConstantSharing before CommonSubexpressionElimination by intention as it can create more opportunities for
// CSE. For example, if A and B nodes consume different initializers with same value, by default,
// CSE will not merge them.
InlinedHashSet<std::string> excluded_initializers;
excluded_initializers.reserve(session_options.initializers_to_share_map.size());
for (const auto& p : session_options.initializers_to_share_map) {
excluded_initializers.insert(p.first);
}
const InlinedHashSet<std::string_view> no_limit_empty_ep_list = {};
transformers.emplace_back(std::make_unique<ConstantSharing>(no_limit_empty_ep_list, excluded_initializers));
transformers.emplace_back(std::make_unique<ShapeInputMerge>());
transformers.emplace_back(std::make_unique<CommonSubexpressionElimination>());
transformers.emplace_back(std::make_unique<ConstantFolding>(cpu_execution_provider, !disable_quant_qdq,
session_options.config_options));
Expand Down
78 changes: 0 additions & 78 deletions onnxruntime/core/optimizer/shape_input_merge.cc

This file was deleted.

23 changes: 0 additions & 23 deletions onnxruntime/core/optimizer/shape_input_merge.h

This file was deleted.

75 changes: 0 additions & 75 deletions onnxruntime/test/optimizer/graph_transform_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@
#include "core/optimizer/relu_clip_fusion.h"
#include "core/optimizer/reshape_fusion.h"
#include "core/optimizer/rule_based_graph_transformer.h"
#include "core/optimizer/shape_input_merge.h"
#include "core/optimizer/slice_elimination.h"
#include "core/optimizer/unsqueeze_elimination.h"
#include "core/optimizer/utils.h"
Expand Down Expand Up @@ -7691,80 +7690,6 @@ TEST_F(GraphTransformationTests, GatherToSliceFusion) {
}
}

TEST_F(GraphTransformationTests, ShapeInputMerge) {
auto build_test_case = [&](ModelTestBuilder& builder) {
std::vector<std::variant<int64_t, std::string>> input_shape;
input_shape.reserve(5);
input_shape.emplace_back("dim0");
input_shape.emplace_back(512);
input_shape.emplace_back(1);
input_shape.emplace_back(1536);
input_shape.emplace_back("dim4");
auto* input_arg = builder.MakeSymbolicInput<float>(input_shape);
auto* neg_out = builder.MakeIntermediate();
auto* axes_initializer = builder.MakeInitializer<int64_t>({1}, {static_cast<int64_t>(2)});
auto* squeeze_out = builder.MakeIntermediate();
auto* cast_out = builder.MakeIntermediate();
auto* unsqueeze_out = builder.MakeOutput();
auto* shape_1_out = builder.MakeOutput();
auto* shape_2_out = builder.MakeOutput();
auto* shape_3_out = builder.MakeOutput();
auto* shape_4_out = builder.MakeOutput();
auto* shape_5_out = builder.MakeOutput();
builder.AddNode("Neg", {input_arg}, {neg_out});
builder.AddNode("Squeeze", {neg_out, axes_initializer}, {squeeze_out});
builder.AddNode("Cast", {squeeze_out}, {cast_out}).AddAttribute("to", static_cast<int64_t>(10));
builder.AddNode("Unsqueeze", {cast_out, axes_initializer}, {unsqueeze_out});
builder.AddNode("Shape", {input_arg}, {shape_1_out});
builder.AddNode("Shape", {neg_out}, {shape_2_out});
builder.AddNode("Shape", {squeeze_out}, {shape_3_out});
builder.AddNode("Shape", {cast_out}, {shape_4_out});
builder.AddNode("Shape", {unsqueeze_out}, {shape_5_out});
};

auto pre_graph_checker = [&](Graph& graph) {
InlinedHashMap<std::string, int> ref_count;
for (auto& node : graph.Nodes()) {
if (node.OpType() == "Shape") {
std::string name = node.InputDefs()[0]->Name();
if (ref_count.find(name) == ref_count.end()) {
ref_count[name] = 1;
} else {
ref_count[name]++;
}
}
}
TEST_RETURN_IF_NOT(ref_count.size() == 5);
return Status::OK();
};

auto post_graph_checker = [&](Graph& graph) {
InlinedHashMap<std::string, int> ref_count;
for (auto& node : graph.Nodes()) {
if (node.OpType() == "Shape") {
std::string name = node.InputDefs()[0]->Name();
if (ref_count.find(name) == ref_count.end()) {
ref_count[name] = 1;
} else {
ref_count[name]++;
}
}
}
TEST_RETURN_IF_NOT(ref_count.size() == 2);
int sum = 0, mul = 1;
for (auto& entry : ref_count) {
sum += entry.second;
mul *= entry.second;
}
TEST_RETURN_IF_NOT(sum == 5 && mul == 6);
return Status::OK();
};

std::unique_ptr<GraphTransformer> transformer = std::make_unique<ShapeInputMerge>();
ASSERT_STATUS_OK(TestGraphTransformer(build_test_case, 14, *logger_, std::move(transformer), TransformerLevel::Level1,
1, pre_graph_checker, post_graph_checker));
}

#if !defined(DISABLE_CONTRIB_OPS)

TEST_F(GraphTransformationTests, MatMulNBitsBiasFusion) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
#include "core/optimizer/relu_clip_fusion.h"
#include "core/optimizer/reshape_fusion.h"
#include "core/optimizer/rule_based_graph_transformer.h"
#include "core/optimizer/shape_input_merge.h"
#include "core/optimizer/skip_layer_norm_fusion.h"
#include "core/optimizer/slice_elimination.h"
#include "core/optimizer/unsqueeze_elimination.h"
Expand Down Expand Up @@ -117,11 +116,10 @@ std::vector<std::unique_ptr<GraphTransformer>> GeneratePreTrainingTransformers(
ORT_THROW_IF_ERROR(rule_transformer->Register(std::make_unique<PythonOpRewriter>()));
#endif

// Put ConstantSharing and ShapeInputMerge before CommonSubexpressionElimination by intention as it can create
// more opportunities for CSE. For example, if A and B nodes consume same different args but produce same output
// or consume different initializers with same value, by default, CSE will not merge them.
// Put ConstantSharing before CommonSubexpressionElimination by intention as it can create more opportunities for
// CSE. For example, if A and B nodes consume different initializers with same value, by default,
// CSE will not merge them.
transformers.emplace_back(std::make_unique<ConstantSharing>(compatible_eps));
transformers.emplace_back(std::make_unique<ShapeInputMerge>(compatible_eps));
// LayerNormFusion must be applied before CommonSubexpressionElimination as the latter will break the pattern when 2 LayerNormFusion share the same input.
transformers.emplace_back(std::make_unique<LayerNormFusion>(compatible_eps));
// Remove duplicate nodes. Must be applied before any recompute transformations.
Expand Down

0 comments on commit 3c0b407

Please sign in to comment.