Skip to content

Commit

Permalink
AMDGPU: Do not generate non-temporal hint when Load_Tr intrinsic did …
Browse files Browse the repository at this point in the history
…not specify it (#79104)

int_amdgcn_global_load_tr did not specify non-temporal load transpose,
thus we should
not genetrate the non-temporal hint for the load. We need to implement
getTgtMemIntrinsic
to create the corresponding MemSDNode. And we don't set the non-temporal
flag because
the intrinsic did not specify it.

NOTE: We need to implement getTgtMemIntrinsic for any memory intrinsics.
  • Loading branch information
changpeng authored Jan 23, 2024
1 parent 55a7bb0 commit 32073b8
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 32 deletions.
9 changes: 9 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1348,6 +1348,14 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::amdgcn_global_load_tr: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType());
Info.ptrVal = CI.getOperand(0);
Info.align.reset();
Info.flags |= MachineMemOperand::MOLoad;
return true;
}
case Intrinsic::amdgcn_ds_gws_init:
case Intrinsic::amdgcn_ds_gws_barrier:
case Intrinsic::amdgcn_ds_gws_sema_v:
Expand Down Expand Up @@ -1407,6 +1415,7 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
SmallVectorImpl<Value*> &Ops,
Type *&AccessTy) const {
switch (II->getIntrinsicID()) {
case Intrinsic::amdgcn_global_load_tr:
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap:
case Intrinsic::amdgcn_ds_append:
Expand Down
24 changes: 8 additions & 16 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@ define amdgpu_kernel void @global_load_tr_b64(ptr addrspace(1) %addr, ptr addrsp
; GFX12-SDAG-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX12-SDAG-W32-NEXT: v_mov_b32_e32 v2, 0
; GFX12-SDAG-W32-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-W32-NEXT: global_load_tr_b64 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
; GFX12-SDAG-W32-NEXT: global_load_tr_b64 v[0:1], v2, s[0:1] offset:32
; GFX12-SDAG-W32-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-W32-NEXT: global_inv scope:SCOPE_SYS
; GFX12-SDAG-W32-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-SDAG-W32-NEXT: s_nop 0
; GFX12-SDAG-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
Expand All @@ -26,9 +25,8 @@ define amdgpu_kernel void @global_load_tr_b64(ptr addrspace(1) %addr, ptr addrsp
; GFX12-GISEL-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX12-GISEL-W32-NEXT: v_mov_b32_e32 v2, 0
; GFX12-GISEL-W32-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-W32-NEXT: global_load_tr_b64 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
; GFX12-GISEL-W32-NEXT: global_load_tr_b64 v[0:1], v2, s[0:1] offset:32
; GFX12-GISEL-W32-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-W32-NEXT: global_inv scope:SCOPE_SYS
; GFX12-GISEL-W32-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-GISEL-W32-NEXT: s_nop 0
; GFX12-GISEL-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
Expand All @@ -46,9 +44,8 @@ define amdgpu_kernel void @global_load_tr_b128_i16(ptr addrspace(1) %addr, ptr a
; GFX12-SDAG-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX12-SDAG-W32-NEXT: v_mov_b32_e32 v4, 0
; GFX12-SDAG-W32-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
; GFX12-SDAG-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
; GFX12-SDAG-W32-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-W32-NEXT: global_inv scope:SCOPE_SYS
; GFX12-SDAG-W32-NEXT: global_store_b128 v4, v[0:3], s[2:3]
; GFX12-SDAG-W32-NEXT: s_nop 0
; GFX12-SDAG-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
Expand All @@ -59,9 +56,8 @@ define amdgpu_kernel void @global_load_tr_b128_i16(ptr addrspace(1) %addr, ptr a
; GFX12-GISEL-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX12-GISEL-W32-NEXT: v_mov_b32_e32 v4, 0
; GFX12-GISEL-W32-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
; GFX12-GISEL-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
; GFX12-GISEL-W32-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-W32-NEXT: global_inv scope:SCOPE_SYS
; GFX12-GISEL-W32-NEXT: global_store_b128 v4, v[0:3], s[2:3]
; GFX12-GISEL-W32-NEXT: s_nop 0
; GFX12-GISEL-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
Expand All @@ -79,9 +75,8 @@ define amdgpu_kernel void @global_load_tr_b128_half(ptr addrspace(1) %addr, ptr
; GFX12-SDAG-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX12-SDAG-W32-NEXT: v_mov_b32_e32 v4, 0
; GFX12-SDAG-W32-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
; GFX12-SDAG-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
; GFX12-SDAG-W32-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-W32-NEXT: global_inv scope:SCOPE_SYS
; GFX12-SDAG-W32-NEXT: global_store_b128 v4, v[0:3], s[2:3]
; GFX12-SDAG-W32-NEXT: s_nop 0
; GFX12-SDAG-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
Expand All @@ -92,9 +87,8 @@ define amdgpu_kernel void @global_load_tr_b128_half(ptr addrspace(1) %addr, ptr
; GFX12-GISEL-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX12-GISEL-W32-NEXT: v_mov_b32_e32 v4, 0
; GFX12-GISEL-W32-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
; GFX12-GISEL-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
; GFX12-GISEL-W32-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-W32-NEXT: global_inv scope:SCOPE_SYS
; GFX12-GISEL-W32-NEXT: global_store_b128 v4, v[0:3], s[2:3]
; GFX12-GISEL-W32-NEXT: s_nop 0
; GFX12-GISEL-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
Expand All @@ -112,9 +106,8 @@ define amdgpu_kernel void @global_load_tr_b128_bfloat(ptr addrspace(1) %addr, pt
; GFX12-SDAG-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX12-SDAG-W32-NEXT: v_mov_b32_e32 v4, 0
; GFX12-SDAG-W32-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
; GFX12-SDAG-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
; GFX12-SDAG-W32-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-W32-NEXT: global_inv scope:SCOPE_SYS
; GFX12-SDAG-W32-NEXT: global_store_b128 v4, v[0:3], s[2:3]
; GFX12-SDAG-W32-NEXT: s_nop 0
; GFX12-SDAG-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
Expand All @@ -125,9 +118,8 @@ define amdgpu_kernel void @global_load_tr_b128_bfloat(ptr addrspace(1) %addr, pt
; GFX12-GISEL-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX12-GISEL-W32-NEXT: v_mov_b32_e32 v4, 0
; GFX12-GISEL-W32-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
; GFX12-GISEL-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
; GFX12-GISEL-W32-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-W32-NEXT: global_inv scope:SCOPE_SYS
; GFX12-GISEL-W32-NEXT: global_store_b128 v4, v[0:3], s[2:3]
; GFX12-GISEL-W32-NEXT: s_nop 0
; GFX12-GISEL-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
Expand Down
24 changes: 8 additions & 16 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@ define amdgpu_kernel void @global_load_tr_b64(ptr addrspace(1) %addr, ptr addrsp
; GFX12-SDAG-W64-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX12-SDAG-W64-NEXT: v_mov_b32_e32 v0, 0
; GFX12-SDAG-W64-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-W64-NEXT: global_load_tr_b64 v1, v0, s[0:1] offset:32 th:TH_LOAD_NT
; GFX12-SDAG-W64-NEXT: global_load_tr_b64 v1, v0, s[0:1] offset:32
; GFX12-SDAG-W64-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-W64-NEXT: global_inv scope:SCOPE_SYS
; GFX12-SDAG-W64-NEXT: global_store_b32 v0, v1, s[2:3]
; GFX12-SDAG-W64-NEXT: s_nop 0
; GFX12-SDAG-W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
Expand All @@ -26,9 +25,8 @@ define amdgpu_kernel void @global_load_tr_b64(ptr addrspace(1) %addr, ptr addrsp
; GFX12-GISEL-W64-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX12-GISEL-W64-NEXT: v_mov_b32_e32 v0, 0
; GFX12-GISEL-W64-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-W64-NEXT: global_load_tr_b64 v1, v0, s[0:1] offset:32 th:TH_LOAD_NT
; GFX12-GISEL-W64-NEXT: global_load_tr_b64 v1, v0, s[0:1] offset:32
; GFX12-GISEL-W64-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-W64-NEXT: global_inv scope:SCOPE_SYS
; GFX12-GISEL-W64-NEXT: global_store_b32 v0, v1, s[2:3]
; GFX12-GISEL-W64-NEXT: s_nop 0
; GFX12-GISEL-W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
Expand All @@ -46,9 +44,8 @@ define amdgpu_kernel void @global_load_tr_b128_i16(ptr addrspace(1) %addr, ptr a
; GFX12-SDAG-W64-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX12-SDAG-W64-NEXT: v_mov_b32_e32 v2, 0
; GFX12-SDAG-W64-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
; GFX12-SDAG-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32
; GFX12-SDAG-W64-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-W64-NEXT: global_inv scope:SCOPE_SYS
; GFX12-SDAG-W64-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-SDAG-W64-NEXT: s_nop 0
; GFX12-SDAG-W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
Expand All @@ -59,9 +56,8 @@ define amdgpu_kernel void @global_load_tr_b128_i16(ptr addrspace(1) %addr, ptr a
; GFX12-GISEL-W64-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX12-GISEL-W64-NEXT: v_mov_b32_e32 v2, 0
; GFX12-GISEL-W64-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
; GFX12-GISEL-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32
; GFX12-GISEL-W64-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-W64-NEXT: global_inv scope:SCOPE_SYS
; GFX12-GISEL-W64-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-GISEL-W64-NEXT: s_nop 0
; GFX12-GISEL-W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
Expand All @@ -79,9 +75,8 @@ define amdgpu_kernel void @global_load_tr_b128_half(ptr addrspace(1) %addr, ptr
; GFX12-SDAG-W64-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX12-SDAG-W64-NEXT: v_mov_b32_e32 v2, 0
; GFX12-SDAG-W64-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
; GFX12-SDAG-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32
; GFX12-SDAG-W64-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-W64-NEXT: global_inv scope:SCOPE_SYS
; GFX12-SDAG-W64-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-SDAG-W64-NEXT: s_nop 0
; GFX12-SDAG-W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
Expand All @@ -92,9 +87,8 @@ define amdgpu_kernel void @global_load_tr_b128_half(ptr addrspace(1) %addr, ptr
; GFX12-GISEL-W64-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX12-GISEL-W64-NEXT: v_mov_b32_e32 v2, 0
; GFX12-GISEL-W64-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
; GFX12-GISEL-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32
; GFX12-GISEL-W64-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-W64-NEXT: global_inv scope:SCOPE_SYS
; GFX12-GISEL-W64-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-GISEL-W64-NEXT: s_nop 0
; GFX12-GISEL-W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
Expand All @@ -112,9 +106,8 @@ define amdgpu_kernel void @global_load_tr_b128_bfloat(ptr addrspace(1) %addr, pt
; GFX12-SDAG-W64-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX12-SDAG-W64-NEXT: v_mov_b32_e32 v2, 0
; GFX12-SDAG-W64-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
; GFX12-SDAG-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32
; GFX12-SDAG-W64-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-W64-NEXT: global_inv scope:SCOPE_SYS
; GFX12-SDAG-W64-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-SDAG-W64-NEXT: s_nop 0
; GFX12-SDAG-W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
Expand All @@ -125,9 +118,8 @@ define amdgpu_kernel void @global_load_tr_b128_bfloat(ptr addrspace(1) %addr, pt
; GFX12-GISEL-W64-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX12-GISEL-W64-NEXT: v_mov_b32_e32 v2, 0
; GFX12-GISEL-W64-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
; GFX12-GISEL-W64-NEXT: global_load_tr_b128 v[0:1], v2, s[0:1] offset:32
; GFX12-GISEL-W64-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-W64-NEXT: global_inv scope:SCOPE_SYS
; GFX12-GISEL-W64-NEXT: global_store_b64 v2, v[0:1], s[2:3]
; GFX12-GISEL-W64-NEXT: s_nop 0
; GFX12-GISEL-W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
Expand Down

0 comments on commit 32073b8

Please sign in to comment.