Skip to content

Commit

Permalink
[RISCV] Make X5 allocatable for JALR on CPUs without RAS
Browse files Browse the repository at this point in the history
Some microarchitectures may not support RAS, then we don't need to
reserve X5 register for JALR.

If RAS is supported, we will select the register allocation order
without X5 (because alternative orders should be subsets of the
default order).
  • Loading branch information
wangpc-pp committed Feb 5, 2024
1 parent b53169d commit f051f4a
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 48 deletions.
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCVFeatures.td
Original file line number Diff line number Diff line change
Expand Up @@ -1125,6 +1125,9 @@ def FeatureFastUnalignedAccess
def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
"UsePostRAScheduler", "true", "Schedule again after register allocation">;

def FeatureNoRAS : SubtargetFeature<"no-ret-addr-stack", "HasRetAddrStack", "false",
"Hasn't RAS (Return Address Stack)">;

def TuneNoOptimizedZeroStrideLoad
: SubtargetFeature<"no-optimized-zero-stride-load", "HasOptimizedZeroStrideLoad",
"false", "Hasn't optimized (perform fewer memory operations)"
Expand Down
20 changes: 17 additions & 3 deletions llvm/lib/Target/RISCV/RISCVRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,13 @@ def GPRNoX0X2 : GPRRegisterClass<(sub GPR, X0, X2)>;
// stack on some microarchitectures. Also remove the reserved registers X0, X2,
// X3, and X4 as it reduces the number of register classes that get synthesized
// by tablegen.
def GPRJALR : GPRRegisterClass<(sub GPR, (sequence "X%u", 0, 5))>;
// If RAS is supported, we select the alternative register order without X5.
def GPRJALR : GPRRegisterClass<(sub GPR, (sequence "X%u", 0, 4))> {
list<dag> AltOrders = [(sub GPR, (sequence "X%u", 0, 5))];
code AltOrderSelect = [{
return MF.getSubtarget<RISCVSubtarget>().hasRetAddrStack();
}];
}

def GPRC : GPRRegisterClass<(add (sequence "X%u", 10, 15),
(sequence "X%u", 8, 9))>;
Expand All @@ -162,9 +168,17 @@ def GPRC : GPRRegisterClass<(add (sequence "X%u", 10, 15),
// restored to the saved value before the tail call, which would clobber a call
// address. We shouldn't use x5 since that is a hint for to pop the return
// address stack on some microarchitectures.
def GPRTC : GPRRegisterClass<(add (sequence "X%u", 6, 7),
// If RAS is supported, we select the alternative register order without X5.
def GPRTC : GPRRegisterClass<(add (sequence "X%u", 5, 7),
(sequence "X%u", 10, 17),
(sequence "X%u", 28, 31))>;
(sequence "X%u", 28, 31))> {
list<dag> AltOrders = [(add (sequence "X%u", 6, 7),
(sequence "X%u", 10, 17),
(sequence "X%u", 28, 31))];
code AltOrderSelect = [{
return MF.getSubtarget<RISCVSubtarget>().hasRetAddrStack();
}];
}

def SP : GPRRegisterClass<(add X2)>;

Expand Down
71 changes: 52 additions & 19 deletions llvm/test/CodeGen/RISCV/calls.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s
; RUN: llc -relocation-model=pic -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=CHECK,RV32I-PIC %s
; RUN: llc -mtriple=riscv32 -mattr=+no-ret-addr-stack -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=CHECK,RV32I-NO-RAS %s

declare i32 @external_function(i32)

Expand Down Expand Up @@ -74,22 +76,56 @@ define i32 @test_call_indirect(ptr %a, i32 %b) nounwind {
; Make sure we don't use t0 as the source for jalr as that is a hint to pop the
; return address stack on some microarchitectures.
define i32 @test_call_indirect_no_t0(ptr %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) nounwind {
; CHECK-LABEL: test_call_indirect_no_t0:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; CHECK-NEXT: mv t1, a0
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: mv a2, a3
; CHECK-NEXT: mv a3, a4
; CHECK-NEXT: mv a4, a5
; CHECK-NEXT: mv a5, a6
; CHECK-NEXT: mv a6, a7
; CHECK-NEXT: jalr t1
; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
; RV32I-LABEL: test_call_indirect_no_t0:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv t1, a0
; RV32I-NEXT: mv a0, a1
; RV32I-NEXT: mv a1, a2
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: mv a4, a5
; RV32I-NEXT: mv a5, a6
; RV32I-NEXT: mv a6, a7
; RV32I-NEXT: jalr t1
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32I-PIC-LABEL: test_call_indirect_no_t0:
; RV32I-PIC: # %bb.0:
; RV32I-PIC-NEXT: addi sp, sp, -16
; RV32I-PIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-PIC-NEXT: mv t1, a0
; RV32I-PIC-NEXT: mv a0, a1
; RV32I-PIC-NEXT: mv a1, a2
; RV32I-PIC-NEXT: mv a2, a3
; RV32I-PIC-NEXT: mv a3, a4
; RV32I-PIC-NEXT: mv a4, a5
; RV32I-PIC-NEXT: mv a5, a6
; RV32I-PIC-NEXT: mv a6, a7
; RV32I-PIC-NEXT: jalr t1
; RV32I-PIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-PIC-NEXT: addi sp, sp, 16
; RV32I-PIC-NEXT: ret
;
; RV32I-NO-RAS-LABEL: test_call_indirect_no_t0:
; RV32I-NO-RAS: # %bb.0:
; RV32I-NO-RAS-NEXT: addi sp, sp, -16
; RV32I-NO-RAS-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NO-RAS-NEXT: mv t0, a0
; RV32I-NO-RAS-NEXT: mv a0, a1
; RV32I-NO-RAS-NEXT: mv a1, a2
; RV32I-NO-RAS-NEXT: mv a2, a3
; RV32I-NO-RAS-NEXT: mv a3, a4
; RV32I-NO-RAS-NEXT: mv a4, a5
; RV32I-NO-RAS-NEXT: mv a5, a6
; RV32I-NO-RAS-NEXT: mv a6, a7
; RV32I-NO-RAS-NEXT: jalr t0
; RV32I-NO-RAS-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NO-RAS-NEXT: addi sp, sp, 16
; RV32I-NO-RAS-NEXT: ret
%1 = call i32 %a(i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h)
ret i32 %1
}
Expand Down Expand Up @@ -184,6 +220,3 @@ define i32 @test_call_defined_many_args(i32 %a) nounwind {
i32 %a, i32 %a, i32 %a, i32 %a, i32 %a)
ret i32 %1
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV32I: {{.*}}
; RV32I-PIC: {{.*}}
79 changes: 53 additions & 26 deletions llvm/test/CodeGen/RISCV/tail-calls.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple riscv32-unknown-linux-gnu -o - %s | FileCheck %s
; RUN: llc -mtriple riscv32-unknown-elf -o - %s | FileCheck %s
; RUN: llc -mtriple riscv32-unknown-linux-gnu -o - %s \
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RAS
; RUN: llc -mtriple riscv32-unknown-elf -o - %s \
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RAS
; RUN: llc -mtriple riscv32 -mattr=+no-ret-addr-stack -o - %s \
; RUN: | FileCheck --check-prefixes=CHECK,CHECK-NO-RAS %s

; Perform tail call optimization for global address.
declare i32 @callee_tail(i32 %i)
Expand Down Expand Up @@ -52,19 +56,29 @@ entry:
declare void @callee_indirect1()
declare void @callee_indirect2()
define void @caller_indirect_tail(i32 %a) nounwind {
; CHECK-LABEL: caller_indirect_tail:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: beqz a0, .LBB3_2
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: lui a0, %hi(callee_indirect2)
; CHECK-NEXT: addi t1, a0, %lo(callee_indirect2)
; CHECK-NEXT: jr t1
; CHECK-NEXT: .LBB3_2:
; CHECK-NEXT: lui a0, %hi(callee_indirect1)
; CHECK-NEXT: addi t1, a0, %lo(callee_indirect1)
; CHECK-NEXT: jr t1


; CHECK-RAS-LABEL: caller_indirect_tail:
; CHECK-RAS: # %bb.0: # %entry
; CHECK-RAS-NEXT: beqz a0, .LBB3_2
; CHECK-RAS-NEXT: # %bb.1: # %entry
; CHECK-RAS-NEXT: lui a0, %hi(callee_indirect2)
; CHECK-RAS-NEXT: addi t1, a0, %lo(callee_indirect2)
; CHECK-RAS-NEXT: jr t1
; CHECK-RAS-NEXT: .LBB3_2:
; CHECK-RAS-NEXT: lui a0, %hi(callee_indirect1)
; CHECK-RAS-NEXT: addi t1, a0, %lo(callee_indirect1)
; CHECK-RAS-NEXT: jr t1
;
; CHECK-NO-RAS-LABEL: caller_indirect_tail:
; CHECK-NO-RAS: # %bb.0: # %entry
; CHECK-NO-RAS-NEXT: beqz a0, .LBB3_2
; CHECK-NO-RAS-NEXT: # %bb.1: # %entry
; CHECK-NO-RAS-NEXT: lui a0, %hi(callee_indirect2)
; CHECK-NO-RAS-NEXT: addi t0, a0, %lo(callee_indirect2)
; CHECK-NO-RAS-NEXT: jr t0
; CHECK-NO-RAS-NEXT: .LBB3_2:
; CHECK-NO-RAS-NEXT: lui a0, %hi(callee_indirect1)
; CHECK-NO-RAS-NEXT: addi t0, a0, %lo(callee_indirect1)
; CHECK-NO-RAS-NEXT: jr t0
entry:
%tobool = icmp eq i32 %a, 0
%callee = select i1 %tobool, ptr @callee_indirect1, ptr @callee_indirect2
Expand All @@ -75,17 +89,30 @@ entry:
; Make sure we don't use t0 as the source for jr as that is a hint to pop the
; return address stack on some microarchitectures.
define i32 @caller_indirect_no_t0(ptr %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
; CHECK-LABEL: caller_indirect_no_t0:
; CHECK: # %bb.0:
; CHECK-NEXT: mv t1, a0
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: mv a2, a3
; CHECK-NEXT: mv a3, a4
; CHECK-NEXT: mv a4, a5
; CHECK-NEXT: mv a5, a6
; CHECK-NEXT: mv a6, a7
; CHECK-NEXT: jr t1
; CHECK-RAS-LABEL: caller_indirect_no_t0:
; CHECK-RAS: # %bb.0:
; CHECK-RAS-NEXT: mv t1, a0
; CHECK-RAS-NEXT: mv a0, a1
; CHECK-RAS-NEXT: mv a1, a2
; CHECK-RAS-NEXT: mv a2, a3
; CHECK-RAS-NEXT: mv a3, a4
; CHECK-RAS-NEXT: mv a4, a5
; CHECK-RAS-NEXT: mv a5, a6
; CHECK-RAS-NEXT: mv a6, a7
; CHECK-RAS-NEXT: jr t1
;
; CHECK-NO-RAS-LABEL: caller_indirect_no_t0:
; CHECK-NO-RAS: # %bb.0:
; CHECK-NO-RAS-NEXT: mv t0, a0
; CHECK-NO-RAS-NEXT: mv a0, a1
; CHECK-NO-RAS-NEXT: mv a1, a2
; CHECK-NO-RAS-NEXT: mv a2, a3
; CHECK-NO-RAS-NEXT: mv a3, a4
; CHECK-NO-RAS-NEXT: mv a4, a5
; CHECK-NO-RAS-NEXT: mv a5, a6
; CHECK-NO-RAS-NEXT: mv a6, a7
; CHECK-NO-RAS-NEXT: jr t0

%9 = tail call i32 %0(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7)
ret i32 %9
}
Expand Down

0 comments on commit f051f4a

Please sign in to comment.