Skip to content

Commit

Permalink
[AMDGPU] Fix machine verification failure from INIT_EXEC lowering (#9…
Browse files Browse the repository at this point in the history
…8333)

Fix machine verification failure from INIT_EXEC lowering since it was
moved from SILowerControlFlow to SIWholeQuadMode in #94452.
  • Loading branch information
jayfoad authored Jul 11, 2024
1 parent ce92b2f commit d4e46f0
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 1 deletion.
4 changes: 3 additions & 1 deletion llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1676,6 +1676,8 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
if (!(GlobalFlags & (StateWQM | StateStrict)) && LowerToCopyInstrs.empty() &&
LowerToMovInstrs.empty() && KillInstrs.empty()) {
lowerLiveMaskQueries();
if (!InitExecInstrs.empty())
LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
return !InitExecInstrs.empty() || !LiveMaskQueries.empty();
}

Expand Down Expand Up @@ -1717,7 +1719,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
LIS->removeAllRegUnitsForPhysReg(AMDGPU::SCC);

// If we performed any kills then recompute EXEC
if (!KillInstrs.empty())
if (!KillInstrs.empty() || !InitExecInstrs.empty())
LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);

return true;
Expand Down
75 changes: 75 additions & 0 deletions llvm/test/CodeGen/AMDGPU/wqm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3463,6 +3463,81 @@ bb:
ret void
}

; Test a case that failed machine verification.
define amdgpu_gs void @wqm_init_exec_switch(i32 %arg) {
; GFX9-W64-LABEL: wqm_init_exec_switch:
; GFX9-W64: ; %bb.0:
; GFX9-W64-NEXT: s_mov_b64 exec, 0
; GFX9-W64-NEXT: v_cmp_lt_i32_e32 vcc, 0, v0
; GFX9-W64-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX9-W64-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
; GFX9-W64-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
; GFX9-W64-NEXT: s_endpgm
;
; GFX10-W32-LABEL: wqm_init_exec_switch:
; GFX10-W32: ; %bb.0:
; GFX10-W32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-W32-NEXT: s_mov_b32 s0, exec_lo
; GFX10-W32-NEXT: v_cmpx_lt_i32_e32 0, v0
; GFX10-W32-NEXT: s_xor_b32 s0, exec_lo, s0
; GFX10-W32-NEXT: s_andn2_saveexec_b32 s0, s0
; GFX10-W32-NEXT: s_endpgm
call void @llvm.amdgcn.init.exec(i64 0)
switch i32 %arg, label %bb1 [
i32 0, label %bb3
i32 1, label %bb2
]
bb1:
ret void
bb2:
ret void
bb3:
ret void
}

define amdgpu_gs void @wqm_init_exec_wwm() {
; GFX9-W64-LABEL: wqm_init_exec_wwm:
; GFX9-W64: ; %bb.0:
; GFX9-W64-NEXT: s_mov_b64 exec, 0
; GFX9-W64-NEXT: s_mov_b32 s1, 0
; GFX9-W64-NEXT: s_mov_b32 s0, s1
; GFX9-W64-NEXT: s_cmp_lg_u64 exec, 0
; GFX9-W64-NEXT: s_cselect_b64 s[2:3], -1, 0
; GFX9-W64-NEXT: s_cmp_lg_u64 s[0:1], 0
; GFX9-W64-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX9-W64-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1]
; GFX9-W64-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s[0:1]
; GFX9-W64-NEXT: v_mov_b32_e32 v1, 0
; GFX9-W64-NEXT: exp mrt0 off, off, off, off
; GFX9-W64-NEXT: s_endpgm
;
; GFX10-W32-LABEL: wqm_init_exec_wwm:
; GFX10-W32: ; %bb.0:
; GFX10-W32-NEXT: s_mov_b32 exec_lo, 0
; GFX10-W32-NEXT: s_mov_b32 s1, 0
; GFX10-W32-NEXT: s_cmp_lg_u64 exec, 0
; GFX10-W32-NEXT: s_mov_b32 s0, s1
; GFX10-W32-NEXT: s_cselect_b32 s2, -1, 0
; GFX10-W32-NEXT: s_cmp_lg_u64 s[0:1], 0
; GFX10-W32-NEXT: v_mov_b32_e32 v1, 0
; GFX10-W32-NEXT: s_cselect_b32 s0, -1, 0
; GFX10-W32-NEXT: s_xor_b32 s0, s2, s0
; GFX10-W32-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s0
; GFX10-W32-NEXT: exp mrt0 off, off, off, off
; GFX10-W32-NEXT: s_endpgm
call void @llvm.amdgcn.init.exec(i64 0)
%i = call i64 @llvm.amdgcn.ballot.i64(i1 true)
%i1 = call i32 @llvm.amdgcn.wwm.i32(i32 0)
%i2 = insertelement <2 x i32> zeroinitializer, i32 %i1, i64 0
%i3 = bitcast <2 x i32> %i2 to i64
%i4 = icmp ne i64 %i, 0
%i5 = icmp ne i64 %i3, 0
%i6 = xor i1 %i4, %i5
%i7 = uitofp i1 %i6 to float
call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float %i7, float 0.0, float 0.0, float 0.0, i1 false, i1 false)
ret void
}

declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #1

Expand Down

0 comments on commit d4e46f0

Please sign in to comment.