Skip to content

Commit

Permalink
[BranchFolding] Add an option to tail merge only bbs without successors
Browse files Browse the repository at this point in the history
One of the benefits of Tail Merge optimization is to
reduce code size by merging identical tails of the BBs.
In this case, if two BBs have the same ending instructions,
one of the BBs will do the fallthrough to the tail BB and
other will use jump instruction to do so. In case BB
where jump is generated is frequently executed, at least
for EraVM this can hurt performance. On the other hand,
for returning BBs we want to do tail merge since after
these BBs we will return from the function and they are
not frequently executed. Because of this,
-tail-merge-only-bbs-without-succ option is introduced to
boost performance in such cases and to reduce code size by
doing tail merging for returning BBs.

PR: #667.

Signed-off-by: Vladimir Radosavljevic <[email protected]>
  • Loading branch information
vladimirradosavljevic committed Sep 24, 2024
1 parent a904d1e commit acb665d
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 0 deletions.
12 changes: 12 additions & 0 deletions llvm/lib/CodeGen/BranchFolding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,13 @@ TailMergeSize("tail-merge-size",
cl::desc("Min number of instructions to consider tail merging"),
cl::init(3), cl::Hidden);

// EraVM local begin
static cl::opt<bool> TailMergeOnlyBBsWithoutSucc(
"tail-merge-only-bbs-without-succ",
cl::desc("Tail merge only basic blocks without successors"),
cl::init(false), cl::Hidden);
// EraVM local end

namespace {

/// BranchFolderPass - Wrap branch folder in a machine function pass.
Expand Down Expand Up @@ -1027,6 +1034,11 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
if (MergePotentials.size() >= 2)
MadeChange |= TryTailMergeBlocks(nullptr, nullptr, MinCommonTailLength);

// EraVM local begin
if (TailMergeOnlyBBsWithoutSucc)
return MadeChange;
// EraVM local end

// Look at blocks (IBB) with multiple predecessors (PBB).
// We change each predecessor to a canonical form, by
// (1) temporarily removing any unconditional branch from the predecessor
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -O3 < %s | FileCheck %s --check-prefix=TAIL-MERGE
; RUN: llc -O3 -tail-merge-only-bbs-without-succ=true < %s | FileCheck %s --check-prefix=TAIL-MERGE-NO-SUCC
; RUN: llc -O3 -enable-tail-merge=false < %s | FileCheck %s --check-prefix=NO-TAIL-MERGE

target datalayout = "E-p:256:256-i256:256:256-S32-a:256:256"
target triple = "eravm"

declare void @use()

define i256 @test(i1 %cond1, i1 %cond2) {
; TAIL-MERGE-LABEL: test:
; TAIL-MERGE: ; %bb.0: ; %entry
; TAIL-MERGE-NEXT: sub! r1, r0, r0
; TAIL-MERGE-NEXT: jump.eq @.BB0_4
; TAIL-MERGE-NEXT: ; %bb.1: ; %bb1
; TAIL-MERGE-NEXT: sub! r2, r0, r0
; TAIL-MERGE-NEXT: jump.ne @.BB0_2
; TAIL-MERGE-NEXT: .BB0_3: ; %bb3
; TAIL-MERGE-NEXT: add 2, r0, r1
; TAIL-MERGE-NEXT: stm.h 2, r1
; TAIL-MERGE-NEXT: .BB0_4: ; %bb5
; TAIL-MERGE-NEXT: add r0, r0, r1
; TAIL-MERGE-NEXT: ret
; TAIL-MERGE-NEXT: .BB0_2: ; %bb2
; TAIL-MERGE-NEXT: call r0, @use, @DEFAULT_UNWIND
; TAIL-MERGE-NEXT: jump @.BB0_3
;
; TAIL-MERGE-NO-SUCC-LABEL: test:
; TAIL-MERGE-NO-SUCC: ; %bb.0: ; %entry
; TAIL-MERGE-NO-SUCC-NEXT: sub! r1, r0, r0
; TAIL-MERGE-NO-SUCC-NEXT: jump.eq @.BB0_3
; TAIL-MERGE-NO-SUCC-NEXT: ; %bb.1: ; %bb1
; TAIL-MERGE-NO-SUCC-NEXT: sub! r2, r0, r0
; TAIL-MERGE-NO-SUCC-NEXT: jump.ne @.BB0_4
; TAIL-MERGE-NO-SUCC-NEXT: ; %bb.2: ; %bb3
; TAIL-MERGE-NO-SUCC-NEXT: add 2, r0, r1
; TAIL-MERGE-NO-SUCC-NEXT: stm.h 2, r1
; TAIL-MERGE-NO-SUCC-NEXT: .BB0_3: ; %bb5
; TAIL-MERGE-NO-SUCC-NEXT: add r0, r0, r1
; TAIL-MERGE-NO-SUCC-NEXT: ret
; TAIL-MERGE-NO-SUCC-NEXT: .BB0_4: ; %bb2
; TAIL-MERGE-NO-SUCC-NEXT: call r0, @use, @DEFAULT_UNWIND
; TAIL-MERGE-NO-SUCC-NEXT: add 2, r0, r1
; TAIL-MERGE-NO-SUCC-NEXT: stm.h 2, r1
; TAIL-MERGE-NO-SUCC-NEXT: add r0, r0, r1
; TAIL-MERGE-NO-SUCC-NEXT: ret
;
; NO-TAIL-MERGE-LABEL: test:
; NO-TAIL-MERGE: ; %bb.0: ; %entry
; NO-TAIL-MERGE-NEXT: sub! r1, r0, r0
; NO-TAIL-MERGE-NEXT: jump.eq @.BB0_3
; NO-TAIL-MERGE-NEXT: ; %bb.1: ; %bb1
; NO-TAIL-MERGE-NEXT: sub! r2, r0, r0
; NO-TAIL-MERGE-NEXT: jump.ne @.BB0_4
; NO-TAIL-MERGE-NEXT: ; %bb.2: ; %bb3
; NO-TAIL-MERGE-NEXT: add 2, r0, r1
; NO-TAIL-MERGE-NEXT: stm.h 2, r1
; NO-TAIL-MERGE-NEXT: add r0, r0, r1
; NO-TAIL-MERGE-NEXT: ret
; NO-TAIL-MERGE-NEXT: .BB0_3: ; %bb5
; NO-TAIL-MERGE-NEXT: add r0, r0, r1
; NO-TAIL-MERGE-NEXT: ret
; NO-TAIL-MERGE-NEXT: .BB0_4: ; %bb2
; NO-TAIL-MERGE-NEXT: call r0, @use, @DEFAULT_UNWIND
; NO-TAIL-MERGE-NEXT: add 2, r0, r1
; NO-TAIL-MERGE-NEXT: stm.h 2, r1
; NO-TAIL-MERGE-NEXT: add r0, r0, r1
; NO-TAIL-MERGE-NEXT: ret
entry:
br i1 %cond1, label %bb1, label %bb5

bb1:
br i1 %cond2, label %bb2, label %bb3, !prof !0

bb2:
call void @use()
store i256 2, ptr addrspace(1) inttoptr (i256 2 to ptr addrspace(1)), align 64
br label %bb4

bb3:
store i256 2, ptr addrspace(1) inttoptr (i256 2 to ptr addrspace(1)), align 64
br label %bb4

bb4:
ret i256 0

bb5:
ret i256 0
}

; Use branch_weights so bb2 can be placed in the end of the function,
; so jump is generated to bb3 instead of a fallthrough.
!0 = !{!"branch_weights", i64 1, i64 1000}

0 comments on commit acb665d

Please sign in to comment.