Skip to content

Commit

Permalink
[WAW reg] ignore non vector and accumulator registers
Browse files Browse the repository at this point in the history
  • Loading branch information
F-Stuckmann committed Oct 1, 2024
1 parent 271cff1 commit 6f05e8b
Show file tree
Hide file tree
Showing 14 changed files with 119 additions and 42 deletions.
27 changes: 27 additions & 0 deletions llvm/lib/Target/AIE/AIE2RegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -457,3 +457,30 @@ AIE2RegisterInfo::getCoveringSubRegs(const TargetRegisterClass &RC) const {
}
return Subregs;
}

bool AIE2RegisterInfo::isVecOrAccRegClass(const TargetRegisterClass &RC) const {
// ******** Vector classes ********
if (AIE2::VEC128RegClass.hasSubClassEq(&RC))
return true;

if (AIE2::VEC256RegClass.hasSubClassEq(&RC))
return true;

if (AIE2::VEC512RegClass.hasSubClassEq(&RC))
return true;

if (AIE2::VEC1024RegClass.hasSubClassEq(&RC))
return true;

// ******** Accumulator classes ********
if (AIE2::ACC256RegClass.hasSubClassEq(&RC))
return true;

if (AIE2::ACC512RegClass.hasSubClassEq(&RC))
return true;

if (AIE2::ACC1024RegClass.hasSubClassEq(&RC))
return true;

return false;
}
1 change: 1 addition & 0 deletions llvm/lib/Target/AIE/AIE2RegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ struct AIE2RegisterInfo : public AIE2GenRegisterInfo {
const TargetRegisterClass *get3DIteratorRegClass() const override {
return &AIE2::eDSRegClass;
}
bool isVecOrAccRegClass(const TargetRegisterClass &RC) const override;
};
} // namespace llvm

Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AIE/AIEBaseRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ struct AIEBaseRegisterInfo : public TargetRegisterInfo {
bool isSimplifiableReservedReg(MCRegister PhysReg) const override {
return false;
}

virtual bool isVecOrAccRegClass(const TargetRegisterClass &RC) const {
llvm_unreachable("Target didn't implement isVecOrAccRegClass()");
}

#if 0
/// Returns a BitVector of the intersection of GPR RegClass
/// and CalleeSaved Registers
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AIE/AIEWawRegRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,9 @@ bool AIEWawRegRewriter::isWorthRenaming(const Register &Reg,
if (!UsedPhysRegs[VRM->getPhys(Reg)])
return false;

if (!TRI->isVecOrAccRegClass(*(MRI->getRegClass(Reg))))
return false;

return !VRegWithCopies[Reg.virtRegIndex()];
}

Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AIE/aie2/GlobalISel/legalize-dyn-stackalloc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -77,19 +77,19 @@ define void @test_loop_dyn_alloca(i32 noundef %n) {
; CHECK-NEXT: nopa ; nopx ; mov p6, sp
; CHECK-NEXT: mov p1, sp
; CHECK-NEXT: lshl r0, r17, r19
; CHECK-NEXT: add r1, r0, #31
; CHECK-NEXT: add r0, r0, #31
; CHECK-NEXT: jl #extern_call
; CHECK-NEXT: mov p0, p1 // Delay Slot 5
; CHECK-NEXT: and r2, r1, r20 // Delay Slot 4
; CHECK-NEXT: mov m0, r2 // Delay Slot 3
; CHECK-NEXT: and r0, r0, r20 // Delay Slot 4
; CHECK-NEXT: mov m0, r0 // Delay Slot 3
; CHECK-NEXT: paddb [p1], m0 // Delay Slot 2
; CHECK-NEXT: mov sp, p1 // Delay Slot 1
; CHECK-NEXT: nopa ; nopb ; add r17, r17, #1; nopm ; nops
; CHECK-NEXT: ltu r3, r17, r16
; CHECK-NEXT: xor r4, r17, r18
; CHECK-NEXT: add r21, r21, r3
; CHECK-NEXT: or r5, r4, r21
; CHECK-NEXT: jnz r5, #.LBB1_1
; CHECK-NEXT: ltu r0, r17, r16
; CHECK-NEXT: add r21, r21, r0
; CHECK-NEXT: xor r0, r17, r18
; CHECK-NEXT: or r0, r0, r21
; CHECK-NEXT: jnz r0, #.LBB1_1
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AIE/aie2/dyn-stackalloc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -77,19 +77,19 @@ define void @test_loop_dyn_alloca(i32 noundef %n) {
; CHECK-NEXT: nopa ; nopx ; mov p6, sp
; CHECK-NEXT: mov p1, sp
; CHECK-NEXT: lshl r0, r17, r19
; CHECK-NEXT: add r1, r0, #31
; CHECK-NEXT: add r0, r0, #31
; CHECK-NEXT: jl #extern_call
; CHECK-NEXT: mov p0, p1 // Delay Slot 5
; CHECK-NEXT: and r2, r1, r20 // Delay Slot 4
; CHECK-NEXT: mov m0, r2 // Delay Slot 3
; CHECK-NEXT: and r0, r0, r20 // Delay Slot 4
; CHECK-NEXT: mov m0, r0 // Delay Slot 3
; CHECK-NEXT: paddb [p1], m0 // Delay Slot 2
; CHECK-NEXT: mov sp, p1 // Delay Slot 1
; CHECK-NEXT: nopa ; nopb ; add r17, r17, #1; nopm ; nops
; CHECK-NEXT: ltu r3, r17, r16
; CHECK-NEXT: xor r4, r17, r18
; CHECK-NEXT: add r21, r21, r3
; CHECK-NEXT: or r5, r4, r21
; CHECK-NEXT: jnz r5, #.LBB1_1
; CHECK-NEXT: ltu r0, r17, r16
; CHECK-NEXT: add r21, r21, r0
; CHECK-NEXT: xor r0, r17, r18
; CHECK-NEXT: or r0, r0, r21
; CHECK-NEXT: jnz r0, #.LBB1_1
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
Expand Down
16 changes: 7 additions & 9 deletions llvm/test/CodeGen/AIE/aie2/end-to-end/Conv2D-red.ll
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ define dso_local void @conv2d.loop.nest(ptr %add.ptr6.i51, ptr %add.ptr5, ptr %c
; ASM-LABEL: conv2d.loop.nest:
; ASM: .p2align 4
; ASM-NEXT: // %bb.0: // %newFuncRoot
; ASM-NEXT: mov r29, r16
; ASM-NEXT: mov s0, r0
; ASM-NEXT: nopx ; mov s0, r0
; ASM-NEXT: mov s1, r1
; ASM-NEXT: paddb [sp], #32; mov s2, r6
; ASM-NEXT: mova dj3, #0; st p7, [sp, #-32] // 4-byte Folded Spill
Expand Down Expand Up @@ -125,7 +124,7 @@ define dso_local void @conv2d.loop.nest(ptr %add.ptr6.i51, ptr %add.ptr5, ptr %c
; ASM-NEXT: vlda.ups.s32.s16 bmh6, s0, [p2, #32]
; ASM-NEXT: vlda.ups.s32.s16 bml6, s0, [p2], m1; mov r0, p0
; ASM-NEXT: vlda.ups.s32.s16 bmh7, s0, [p2, #32]; and r0, r0, r9
; ASM-NEXT: vlda.ups.s32.s16 bml7, s0, [p2, #0]; add r16, r0, #33; mov r0, r5
; ASM-NEXT: vlda.ups.s32.s16 bml7, s0, [p2, #0]; add r1, r0, #33; mov r0, r5
; ASM-NEXT: .p2align 4
; ASM-NEXT: .LBB0_2: // %inner.loop
; ASM-NEXT: // Parent Loop BB0_1 Depth=1
Expand All @@ -139,8 +138,8 @@ define dso_local void @conv2d.loop.nest(ptr %add.ptr6.i51, ptr %add.ptr5, ptr %c
; ASM-NEXT: vldb wh10, [p1], #32
; ASM-NEXT: vldb wl7, [p1], #32
; ASM-NEXT: vldb wh7, [p1], #32
; ASM-NEXT: vshift.align x4, x4, s1, x6, r16
; ASM-NEXT: vshift.align x2, x2, s1, x8, r16
; ASM-NEXT: vshift.align x4, x4, s1, x6, r1
; ASM-NEXT: vshift.align x2, x2, s1, x8, r1
; ASM-NEXT: vshuffle x9, x4, x2, r2
; ASM-NEXT: vshuffle x3, x4, x2, r3
; ASM-NEXT: vmac cm0, cm0, x9, x10, r4
Expand All @@ -150,7 +149,7 @@ define dso_local void @conv2d.loop.nest(ptr %add.ptr6.i51, ptr %add.ptr5, ptr %c
; ASM-NEXT: vmac cm1, cm1, x1, x10, r4 // Delay Slot 4
; ASM-NEXT: mov r1, p0; vmac cm3, cm3, x5, x10, r4 // Delay Slot 3
; ASM-NEXT: and r1, r1, r9; vmac cm5, cm5, x1, x7, r4 // Delay Slot 2
; ASM-NEXT: add r16, r1, #33; vmac cm7, cm7, x5, x7, r4 // Delay Slot 1
; ASM-NEXT: add r1, r1, #33; vmac cm7, cm7, x5, x7, r4 // Delay Slot 1
; ASM-NEXT: // %bb.3: // %outer.loop.latch
; ASM-NEXT: // in Loop: Header=BB0_1 Depth=1
; ASM-NEXT: nopa ; nopb ; nopx ; mov s3, r6; vst.srs.s16.s32 bmh0, s2, [p3, #32]
Expand All @@ -177,15 +176,14 @@ define dso_local void @conv2d.loop.nest(ptr %add.ptr6.i51, ptr %add.ptr5, ptr %c
; ASM-NEXT: padda.3d [p1], d2; paddb [p2], m1; mov m3, r14 // Delay Slot 2
; ASM-NEXT: padda.3d [p2], d3; mov r25, dc5 // Delay Slot 1
; ASM-NEXT: // %bb.4: // %exitStub
; ASM-NEXT: lda p7, [sp, #-32]; nopb ; nopx // 4-byte Folded Reload
; ASM-NEXT: lda p7, [sp, #-32]; nopb ; nopxm // 4-byte Folded Reload
; ASM-NEXT: lda p6, [sp, #-28] // 4-byte Folded Reload
; ASM-NEXT: nop
; ASM-NEXT: ret lr
; ASM-NEXT: nop // Delay Slot 5
; ASM-NEXT: nop // Delay Slot 4
; ASM-NEXT: nop // Delay Slot 3
; ASM-NEXT: nop // Delay Slot 2
; ASM-NEXT: paddb [sp], #-32; mov r16, r29 // Delay Slot 1
; ASM-NEXT: paddb [sp], #-32 // Delay Slot 1
newFuncRoot:
br label %outer.loop.header

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AIE/aie2/hardware-loops/nested.ll
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ define void @nested(ptr nocapture %out, ptr nocapture readonly %in, i32 noundef
; CHECK-NEXT: // => This Inner Loop Header: Depth=2
; CHECK-NEXT: nopb ; nopa ; nops ; lshl r7, r6, r4; nopm ; nopv
; CHECK-NEXT: mov dj0, r7
; CHECK-NEXT: lda r8, [p3, dj0]
; CHECK-NEXT: lda r7, [p3, dj0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: jnzd r5, r5, p2
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: add r6, r6, #1 // Delay Slot 3
; CHECK-NEXT: add r2, r2, r8 // Delay Slot 2
; CHECK-NEXT: add r2, r2, r7 // Delay Slot 2
; CHECK-NEXT: st r2, [p0, #0] // Delay Slot 1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: // %bb.2: // %for.cond3.for.cond.cleanup5_crit_edge
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AIE/aie2/hardware-loops/sibling.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@ define void @sibling(ptr nocapture %out, ptr nocapture readonly %in, i32 noundef
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: nopb ; nopa ; nops ; lshl r6, r5, r4; nopm ; nopv
; CHECK-NEXT: mov dj0, r6
; CHECK-NEXT: lda r7, [p1, dj0]
; CHECK-NEXT: lda r6, [p1, dj0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: jnzd r0, r0, p2
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: add r5, r5, #1 // Delay Slot 3
; CHECK-NEXT: add r3, r3, r7 // Delay Slot 2
; CHECK-NEXT: add r3, r3, r6 // Delay Slot 2
; CHECK-NEXT: st r3, [p0, #0] // Delay Slot 1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: // %bb.2: // %for.body6.lr.ph
Expand All @@ -43,14 +43,14 @@ define void @sibling(ptr nocapture %out, ptr nocapture readonly %in, i32 noundef
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: nopb ; nopa ; nops ; lshl r4, r2, r3; nopm ; nopv
; CHECK-NEXT: mov dj0, r4
; CHECK-NEXT: lda r5, [p1, dj0]
; CHECK-NEXT: lda r4, [p1, dj0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: jnzd r1, r1, p2
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: add r2, r2, #1 // Delay Slot 3
; CHECK-NEXT: add r0, r0, r5 // Delay Slot 2
; CHECK-NEXT: add r0, r0, r4 // Delay Slot 2
; CHECK-NEXT: st r0, [p0, #0] // Delay Slot 1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: // %bb.4: // %for.cond.cleanup5
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AIE/aie2/hardware-loops/simple.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ define void @simple(ptr nocapture %out, ptr nocapture readonly %in, i32 noundef
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: nopb ; nopa ; nops ; lshl r4, r2, r3; nopm ; nopv
; CHECK-NEXT: mov dj0, r4
; CHECK-NEXT: lda r5, [p1, dj0]
; CHECK-NEXT: lda r4, [p1, dj0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: jnzd r0, r0, p2
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: add r2, r2, #1 // Delay Slot 3
; CHECK-NEXT: add r1, r1, r5 // Delay Slot 2
; CHECK-NEXT: add r1, r1, r4 // Delay Slot 2
; CHECK-NEXT: st r1, [p0, #0] // Delay Slot 1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: // %bb.2: // %for.cond.cleanup
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AIE/aie2/hardware-loops/unknown-tc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ define void @cbz_exit(ptr %in, ptr %res) {
; CHECK-NEXT: nopa ; nopb ; add r0, r0, #1
; CHECK-NEXT: lshl r2, r0, r1
; CHECK-NEXT: mov dj0, r2
; CHECK-NEXT: lda r3, [p0, dj0]
; CHECK-NEXT: lda r2, [p0, dj0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: jnz r3, #.LBB0_1
; CHECK-NEXT: jnz r2, #.LBB0_1
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
Expand Down Expand Up @@ -67,14 +67,14 @@ define void @cbnz_exit(ptr %in, ptr %res) {
; CHECK-NEXT: nopa ; nopb ; add r0, r0, #1
; CHECK-NEXT: lshl r2, r0, r1
; CHECK-NEXT: mov dj0, r2
; CHECK-NEXT: lda r3, [p0, dj0]
; CHECK-NEXT: lda r2, [p0, dj0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: jz r3, #.LBB1_1
; CHECK-NEXT: jz r2, #.LBB1_1
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AIE/aie2/hardware-loops/zol-loop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ define void @simple_loop(i32 noundef %n, ptr nocapture readonly %in, ptr nocaptu
; CHECK-NEXT: nop
; CHECK-NEXT: lshl r4, r1, r0
; CHECK-NEXT: add r1, r1, #1
; CHECK-NEXT: add r5, r2, r3; mov dj0, r4
; CHECK-NEXT: add r3, r2, r3; mov dj0, r4
; CHECK-NEXT: .L_LEnd0:
; CHECK-NEXT: nopb ; nopa ; st r5, [p1, dj0]; add r2, r2, #-1; nopm ; nopv
; CHECK-NEXT: nopb ; nopa ; st r3, [p1, dj0]; add r2, r2, #-1; nopm ; nopv
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_3: // %for.cond.cleanup
; CHECK-NEXT: nopa ; ret lr
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AIE/aie2/loop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,20 @@ define i32 @accumulate(i32 %size, ptr %array) {
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: lshl r3, r2, r1
; CHECK-NEXT: mov dj0, r3
; CHECK-NEXT: lda r5, [p0, dj0]
; CHECK-NEXT: lda r3, [p0, dj0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: add r2, r2, #1
; CHECK-NEXT: eq r4, r2, r5
; CHECK-NEXT: eq r4, r2, r3
; CHECK-NEXT: jz r4, #.LBB0_2
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: add r0, r5, r0 // Delay Slot 1
; CHECK-NEXT: add r0, r3, r0 // Delay Slot 1
; CHECK-NEXT: // %bb.3: // %for.cond.cleanup
; CHECK-NEXT: nopa ; ret lr
; CHECK-NEXT: nop // Delay Slot 5
Expand Down
43 changes: 43 additions & 0 deletions llvm/test/CodeGen/AIE/aie2/ra/waw_reg_renaming_loop.mir
Original file line number Diff line number Diff line change
Expand Up @@ -508,3 +508,46 @@ body: |
liveins: $p0, $d0, $r25, $r26, $x10, $dj0, $s0
PseudoRET implicit $lr
...

# Ignore renaming of general purpose registers.
---
name: gpr_replacement
alignment: 16
legalized: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: gpr_replacement
; CHECK: bb.0.entry:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $r0, $r1, $r2, $r8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: LoopStart $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: liveins: $r1, $r2, $r8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $r0 = AND $r1, $r2
; CHECK-NEXT: renamable $r3 = AND $r1, $r8
; CHECK-NEXT: renamable $r0 = AND killed renamable $r0, renamable $r3
; CHECK-NEXT: dead renamable $r0 = AND killed renamable $r3, killed renamable $r0
; CHECK-NEXT: PseudoLoopEnd <mcsymbol .L_1120>, %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: PseudoRET implicit $lr
bb.0.entry:
successors: %bb.1
liveins: $r0, $r1, $r2, $r8
LoopStart $r0
bb.1:
successors: %bb.1, %bb.2
liveins: $r1, $r2, $r8
%0:er = AND $r1, $r2
%1:er = AND $r1, $r8
%2:er = AND %0, %1
%3:er = AND %1, %2
PseudoLoopEnd <mcsymbol .L_1120>, %bb.1
bb.2:
PseudoRET implicit $lr
...

0 comments on commit 6f05e8b

Please sign in to comment.