Skip to content

Commit

Permalink
AMDGPU: Avoid creating unnecessary block split in atomic expansion (#…
Browse files Browse the repository at this point in the history
…102440)

This was creating a new block to insert the is.shared check, but we
can just do that in the original block.
  • Loading branch information
arsenm authored Aug 8, 2024
1 parent 1248698 commit bb7143f
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 54 deletions.
8 changes: 0 additions & 8 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16609,9 +16609,6 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
//
// With this expansion we produce the following code:
// [...]
// br label %atomicrmw.check.shared
//
// atomicrmw.check.shared:
// %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %addr)
// br i1 %is.shared, label %atomicrmw.shared, label %atomicrmw.check.private
//
Expand Down Expand Up @@ -16654,8 +16651,6 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
Function *F = BB->getParent();
BasicBlock *ExitBB =
BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
BasicBlock *CheckSharedBB =
BasicBlock::Create(Ctx, "atomicrmw.check.shared", F, ExitBB);
BasicBlock *SharedBB = BasicBlock::Create(Ctx, "atomicrmw.shared", F, ExitBB);
BasicBlock *CheckPrivateBB =
BasicBlock::Create(Ctx, "atomicrmw.check.private", F, ExitBB);
Expand All @@ -16682,9 +16677,6 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {

std::prev(BB->end())->eraseFromParent();
Builder.SetInsertPoint(BB);
Builder.CreateBr(CheckSharedBB);

Builder.SetInsertPoint(CheckSharedBB);
CallInst *IsShared = Builder.CreateIntrinsic(Intrinsic::amdgcn_is_shared, {},
{Addr}, nullptr, "is.shared");
Builder.CreateCondBr(IsShared, SharedBB, CheckPrivateBB);
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) #0 {

define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 {
; GFX908-LABEL: syncscope_workgroup_nortn:
; GFX908: ; %bb.0: ; %atomicrmw.check.shared
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
Expand Down Expand Up @@ -272,7 +272,7 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 {
; GFX908-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: syncscope_workgroup_nortn:
; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
Expand Down
44 changes: 22 additions & 22 deletions llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -630,7 +630,7 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__am
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
Expand Down Expand Up @@ -682,7 +682,7 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__am
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
; GFX908: ; %bb.0: ; %atomicrmw.check.shared
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
Expand Down Expand Up @@ -839,7 +839,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gra
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
Expand Down Expand Up @@ -893,7 +893,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gra
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
; GFX908: ; %bb.0: ; %atomicrmw.check.shared
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
Expand Down Expand Up @@ -1062,7 +1062,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_gra
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
Expand Down Expand Up @@ -1116,7 +1116,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_gra
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
; GFX908: ; %bb.0: ; %atomicrmw.check.shared
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
Expand Down Expand Up @@ -1469,7 +1469,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gr
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
Expand Down Expand Up @@ -1525,7 +1525,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gr
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
; GFX908: ; %bb.0: ; %atomicrmw.check.shared
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
Expand Down Expand Up @@ -2006,7 +2006,7 @@ define void @flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__a
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
Expand Down Expand Up @@ -2060,7 +2060,7 @@ define void @flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__a
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
; GFX908: ; %bb.0: ; %atomicrmw.check.shared
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
Expand Down Expand Up @@ -2950,7 +2950,7 @@ define void @flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memor
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory:
; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
Expand Down Expand Up @@ -3002,7 +3002,7 @@ define void @flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memor
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory:
; GFX908: ; %bb.0: ; %atomicrmw.check.shared
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
Expand Down Expand Up @@ -3159,7 +3159,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fin
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
Expand Down Expand Up @@ -3213,7 +3213,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fin
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
; GFX908: ; %bb.0: ; %atomicrmw.check.shared
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
Expand Down Expand Up @@ -3382,7 +3382,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fin
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
Expand Down Expand Up @@ -3436,7 +3436,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fin
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
; GFX908: ; %bb.0: ; %atomicrmw.check.shared
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
Expand Down Expand Up @@ -3789,7 +3789,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fi
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
Expand Down Expand Up @@ -3845,7 +3845,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fi
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
; GFX908: ; %bb.0: ; %atomicrmw.check.shared
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
Expand Down Expand Up @@ -4198,7 +4198,7 @@ define void @flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
Expand Down Expand Up @@ -4254,7 +4254,7 @@ define void @flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memo
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
; GFX908: ; %bb.0: ; %atomicrmw.check.shared
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
Expand Down Expand Up @@ -5239,7 +5239,7 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amd
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
Expand Down Expand Up @@ -5291,7 +5291,7 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amd
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
; GFX908: ; %bb.0: ; %atomicrmw.check.shared
; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,6 @@ define i16 @test_cmpxchg_i16_global_agent_align4(ptr addrspace(1) %out, i16 %in,
define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 {
; GFX90A-LABEL: define void @syncscope_workgroup_nortn(
; GFX90A-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR1:[0-9]+]] {
; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]]
; GFX90A: atomicrmw.check.shared:
; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR]])
; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
; GFX90A: atomicrmw.shared:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ define float @syncscope_system(ptr %addr, float %val) {
; GFX908-NEXT: ret float [[TMP5]]
;
; GFX90A-LABEL: @syncscope_system(
; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]]
; GFX90A: atomicrmw.check.shared:
; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]])
; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
; GFX90A: atomicrmw.shared:
Expand All @@ -36,8 +34,8 @@ define float @syncscope_system(ptr %addr, float %val) {
; GFX90A: atomicrmw.private:
; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4
; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
; GFX90A-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4
; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
; GFX90A-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.global:
; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
Expand Down Expand Up @@ -94,8 +92,6 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) {
; GFX908-NEXT: ret float [[TMP5]]
;
; GFX90A-LABEL: @syncscope_workgroup_rtn(
; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]]
; GFX90A: atomicrmw.check.shared:
; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]])
; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
; GFX90A: atomicrmw.shared:
Expand All @@ -108,8 +104,8 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) {
; GFX90A: atomicrmw.private:
; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4
; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
; GFX90A-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4
; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
; GFX90A-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.global:
; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
Expand Down Expand Up @@ -150,8 +146,6 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) {

define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
; GFX908-LABEL: @syncscope_workgroup_nortn(
; GFX908-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]]
; GFX908: atomicrmw.check.shared:
; GFX908-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]])
; GFX908-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
; GFX908: atomicrmw.shared:
Expand All @@ -164,8 +158,8 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
; GFX908: atomicrmw.private:
; GFX908-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
; GFX908-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4
; GFX908-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
; GFX908-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4
; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
; GFX908-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4
; GFX908-NEXT: br label [[ATOMICRMW_PHI]]
; GFX908: atomicrmw.global:
; GFX908-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
Expand All @@ -178,8 +172,6 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
; GFX908-NEXT: ret void
;
; GFX90A-LABEL: @syncscope_workgroup_nortn(
; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]]
; GFX90A: atomicrmw.check.shared:
; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]])
; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
; GFX90A: atomicrmw.shared:
Expand All @@ -192,8 +184,8 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
; GFX90A: atomicrmw.private:
; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4
; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
; GFX90A-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4
; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
; GFX90A-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.global:
; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -595,8 +595,6 @@ define float @test_atomicrmw_fadd_f32_flat_unsafe(ptr %ptr, float %value) #0 {
; GFX908-NEXT: ret float [[TMP5]]
;
; GFX90A-LABEL: @test_atomicrmw_fadd_f32_flat_unsafe(
; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]]
; GFX90A: atomicrmw.check.shared:
; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[PTR:%.*]])
; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
; GFX90A: atomicrmw.shared:
Expand All @@ -609,8 +607,8 @@ define float @test_atomicrmw_fadd_f32_flat_unsafe(ptr %ptr, float %value) #0 {
; GFX90A: atomicrmw.private:
; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5)
; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4
; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VALUE]]
; GFX90A-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4
; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VALUE]]
; GFX90A-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.global:
; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(1)
Expand Down

0 comments on commit bb7143f

Please sign in to comment.