From acb665d666242d7ce3529dd1fa4e281eb5b419ec Mon Sep 17 00:00:00 2001 From: Vladimir Radosavljevic Date: Mon, 29 Jul 2024 12:38:32 +0200 Subject: [PATCH] [BranchFolding] Add an option to tail merge only bbs without successors One of the benefits of Tail Merge optimization is to reduce code size by merging identical tails of the BBs. In this case, if two BBs have the same ending instructions, one of the BBs will do the fallthrough to the tail BB and other will use jump instruction to do so. In case BB where jump is generated is frequently executed, at least for EraVM this can hurt performance. On the other hand, for returning BBs we want to do tail merge since after these BBs we will return from the function and they are not frequently executed. Because of this, -tail-merge-only-bbs-without-succ option is introduced to boost performance in such cases and to reduce code size by doing tail merging for returning BBs. PR: #667. Signed-off-by: Vladimir Radosavljevic --- llvm/lib/CodeGen/BranchFolding.cpp | 12 +++ ...nch-folding-tail-merge-bbs-without-succ.ll | 94 +++++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 llvm/test/CodeGen/EraVM/branch-folding-tail-merge-bbs-without-succ.ll diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index fb7f1973f132..fad12322755a 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -86,6 +86,13 @@ TailMergeSize("tail-merge-size", cl::desc("Min number of instructions to consider tail merging"), cl::init(3), cl::Hidden); +// EraVM local begin +static cl::opt TailMergeOnlyBBsWithoutSucc( + "tail-merge-only-bbs-without-succ", + cl::desc("Tail merge only basic blocks without successors"), + cl::init(false), cl::Hidden); +// EraVM local end + namespace { /// BranchFolderPass - Wrap branch folder in a machine function pass. @@ -1027,6 +1034,11 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { if (MergePotentials.size() >= 2) MadeChange |= TryTailMergeBlocks(nullptr, nullptr, MinCommonTailLength); + // EraVM local begin + if (TailMergeOnlyBBsWithoutSucc) + return MadeChange; + // EraVM local end + // Look at blocks (IBB) with multiple predecessors (PBB). // We change each predecessor to a canonical form, by // (1) temporarily removing any unconditional branch from the predecessor diff --git a/llvm/test/CodeGen/EraVM/branch-folding-tail-merge-bbs-without-succ.ll b/llvm/test/CodeGen/EraVM/branch-folding-tail-merge-bbs-without-succ.ll new file mode 100644 index 000000000000..2e461d2ef12d --- /dev/null +++ b/llvm/test/CodeGen/EraVM/branch-folding-tail-merge-bbs-without-succ.ll @@ -0,0 +1,94 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -O3 < %s | FileCheck %s --check-prefix=TAIL-MERGE +; RUN: llc -O3 -tail-merge-only-bbs-without-succ=true < %s | FileCheck %s --check-prefix=TAIL-MERGE-NO-SUCC +; RUN: llc -O3 -enable-tail-merge=false < %s | FileCheck %s --check-prefix=NO-TAIL-MERGE + +target datalayout = "E-p:256:256-i256:256:256-S32-a:256:256" +target triple = "eravm" + +declare void @use() + +define i256 @test(i1 %cond1, i1 %cond2) { +; TAIL-MERGE-LABEL: test: +; TAIL-MERGE: ; %bb.0: ; %entry +; TAIL-MERGE-NEXT: sub! r1, r0, r0 +; TAIL-MERGE-NEXT: jump.eq @.BB0_4 +; TAIL-MERGE-NEXT: ; %bb.1: ; %bb1 +; TAIL-MERGE-NEXT: sub! r2, r0, r0 +; TAIL-MERGE-NEXT: jump.ne @.BB0_2 +; TAIL-MERGE-NEXT: .BB0_3: ; %bb3 +; TAIL-MERGE-NEXT: add 2, r0, r1 +; TAIL-MERGE-NEXT: stm.h 2, r1 +; TAIL-MERGE-NEXT: .BB0_4: ; %bb5 +; TAIL-MERGE-NEXT: add r0, r0, r1 +; TAIL-MERGE-NEXT: ret +; TAIL-MERGE-NEXT: .BB0_2: ; %bb2 +; TAIL-MERGE-NEXT: call r0, @use, @DEFAULT_UNWIND +; TAIL-MERGE-NEXT: jump @.BB0_3 +; +; TAIL-MERGE-NO-SUCC-LABEL: test: +; TAIL-MERGE-NO-SUCC: ; %bb.0: ; %entry +; TAIL-MERGE-NO-SUCC-NEXT: sub! r1, r0, r0 +; TAIL-MERGE-NO-SUCC-NEXT: jump.eq @.BB0_3 +; TAIL-MERGE-NO-SUCC-NEXT: ; %bb.1: ; %bb1 +; TAIL-MERGE-NO-SUCC-NEXT: sub! r2, r0, r0 +; TAIL-MERGE-NO-SUCC-NEXT: jump.ne @.BB0_4 +; TAIL-MERGE-NO-SUCC-NEXT: ; %bb.2: ; %bb3 +; TAIL-MERGE-NO-SUCC-NEXT: add 2, r0, r1 +; TAIL-MERGE-NO-SUCC-NEXT: stm.h 2, r1 +; TAIL-MERGE-NO-SUCC-NEXT: .BB0_3: ; %bb5 +; TAIL-MERGE-NO-SUCC-NEXT: add r0, r0, r1 +; TAIL-MERGE-NO-SUCC-NEXT: ret +; TAIL-MERGE-NO-SUCC-NEXT: .BB0_4: ; %bb2 +; TAIL-MERGE-NO-SUCC-NEXT: call r0, @use, @DEFAULT_UNWIND +; TAIL-MERGE-NO-SUCC-NEXT: add 2, r0, r1 +; TAIL-MERGE-NO-SUCC-NEXT: stm.h 2, r1 +; TAIL-MERGE-NO-SUCC-NEXT: add r0, r0, r1 +; TAIL-MERGE-NO-SUCC-NEXT: ret +; +; NO-TAIL-MERGE-LABEL: test: +; NO-TAIL-MERGE: ; %bb.0: ; %entry +; NO-TAIL-MERGE-NEXT: sub! r1, r0, r0 +; NO-TAIL-MERGE-NEXT: jump.eq @.BB0_3 +; NO-TAIL-MERGE-NEXT: ; %bb.1: ; %bb1 +; NO-TAIL-MERGE-NEXT: sub! r2, r0, r0 +; NO-TAIL-MERGE-NEXT: jump.ne @.BB0_4 +; NO-TAIL-MERGE-NEXT: ; %bb.2: ; %bb3 +; NO-TAIL-MERGE-NEXT: add 2, r0, r1 +; NO-TAIL-MERGE-NEXT: stm.h 2, r1 +; NO-TAIL-MERGE-NEXT: add r0, r0, r1 +; NO-TAIL-MERGE-NEXT: ret +; NO-TAIL-MERGE-NEXT: .BB0_3: ; %bb5 +; NO-TAIL-MERGE-NEXT: add r0, r0, r1 +; NO-TAIL-MERGE-NEXT: ret +; NO-TAIL-MERGE-NEXT: .BB0_4: ; %bb2 +; NO-TAIL-MERGE-NEXT: call r0, @use, @DEFAULT_UNWIND +; NO-TAIL-MERGE-NEXT: add 2, r0, r1 +; NO-TAIL-MERGE-NEXT: stm.h 2, r1 +; NO-TAIL-MERGE-NEXT: add r0, r0, r1 +; NO-TAIL-MERGE-NEXT: ret +entry: + br i1 %cond1, label %bb1, label %bb5 + +bb1: + br i1 %cond2, label %bb2, label %bb3, !prof !0 + +bb2: + call void @use() + store i256 2, ptr addrspace(1) inttoptr (i256 2 to ptr addrspace(1)), align 64 + br label %bb4 + +bb3: + store i256 2, ptr addrspace(1) inttoptr (i256 2 to ptr addrspace(1)), align 64 + br label %bb4 + +bb4: + ret i256 0 + +bb5: + ret i256 0 +} + +; Use branch_weights so bb2 can be placed in the end of the function, +; so jump is generated to bb3 instead of a fallthrough. +!0 = !{!"branch_weights", i64 1, i64 1000}