diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 3ae33484a44b25..6a8c93f4b92871 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2927,12 +2927,20 @@ SDValue SITargetLowering::LowerFormalArguments( DL, Elts); } - SDValue CMemVT; - if (VT.isScalarInteger() && VT.bitsLT(NewArg.getSimpleValueType())) - CMemVT = DAG.getNode(ISD::TRUNCATE, DL, MemVT, NewArg); - else - CMemVT = DAG.getBitcast(MemVT, NewArg); - NewArg = convertArgType(DAG, VT, MemVT, DL, CMemVT, + // If the argument was preloaded to multiple consecutive 32-bit + // registers because of misalignment between addressable SGPR tuples + // and the argument size, we can still assume that because of kernarg + // segment alignment restrictions that NewArg's size is the same as + // MemVT and just do a bitcast. If MemVT is less than 32-bits we add a + // truncate since we cannot preload to less than a single SGPR and the + // MemVT may be smaller. + EVT MemVTInt = + EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); + if (MemVT.bitsLT(NewArg.getSimpleValueType())) + NewArg = DAG.getNode(ISD::TRUNCATE, DL, MemVTInt, NewArg); + + NewArg = DAG.getBitcast(MemVT, NewArg); + NewArg = convertArgType(DAG, VT, MemVT, DL, NewArg, Ins[i].Flags.isSExt(), &Ins[i]); NewArg = DAG.getMergeValues({NewArg, Chain}, DL); } diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll index f0e709b5a17279..857bb897ead2a3 100644 --- a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll +++ b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll @@ -1,18 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-NO-PRELOAD %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-1 %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=2 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-2 %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=4 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-4 %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=8 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-PRELOAD-8 %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-NO-PRELOAD %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-1 %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=2 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-2 %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=4 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-4 %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -amdgpu-kernarg-preload-count=8 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a-PRELOAD-8 %s -define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) { -; GFX940-NO-PRELOAD-LABEL: ptr1_i8: +define amdgpu_kernel void @ptr1_i8_kernel_preload_arg(ptr addrspace(1) %out, i8 %arg0) { +; GFX940-NO-PRELOAD-LABEL: ptr1_i8_kernel_preload_arg: ; GFX940-NO-PRELOAD: ; %bb.0: ; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8 ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 @@ -23,19 +19,7 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) { ; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NO-PRELOAD-NEXT: s_endpgm ; -; GFX940-PRELOAD-1-LABEL: ptr1_i8: -; GFX940-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-1-NEXT: ; %bb.0: -; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xff -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-PRELOAD-1-NEXT: s_endpgm -; -; GFX940-PRELOAD-2-LABEL: ptr1_i8: +; GFX940-PRELOAD-2-LABEL: ptr1_i8_kernel_preload_arg: ; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-2-NEXT: ; %bb.0: @@ -45,17 +29,7 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) { ; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-PRELOAD-2-NEXT: s_endpgm ; -; GFX940-PRELOAD-4-LABEL: ptr1_i8: -; GFX940-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-4-NEXT: ; %bb.0: -; GFX940-PRELOAD-4-NEXT: s_and_b32 s0, s4, 0xff -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-PRELOAD-4-NEXT: s_endpgm -; -; GFX940-PRELOAD-8-LABEL: ptr1_i8: +; GFX940-PRELOAD-8-LABEL: ptr1_i8_kernel_preload_arg: ; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-8-NEXT: ; %bb.0: @@ -65,7 +39,7 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) { ; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-PRELOAD-8-NEXT: s_endpgm ; -; GFX90a-NO-PRELOAD-LABEL: ptr1_i8: +; GFX90a-NO-PRELOAD-LABEL: ptr1_i8_kernel_preload_arg: ; GFX90a-NO-PRELOAD: ; %bb.0: ; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8 ; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -76,19 +50,7 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) { ; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90a-NO-PRELOAD-NEXT: s_endpgm ; -; GFX90a-PRELOAD-1-LABEL: ptr1_i8: -; GFX90a-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-1-NEXT: ; %bb.0: -; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX90a-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xff -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[6:7] -; GFX90a-PRELOAD-1-NEXT: s_endpgm -; -; GFX90a-PRELOAD-2-LABEL: ptr1_i8: +; GFX90a-PRELOAD-2-LABEL: ptr1_i8_kernel_preload_arg: ; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-2-NEXT: ; %bb.0: @@ -98,17 +60,7 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) { ; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7] ; GFX90a-PRELOAD-2-NEXT: s_endpgm ; -; GFX90a-PRELOAD-4-LABEL: ptr1_i8: -; GFX90a-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-4-NEXT: ; %bb.0: -; GFX90a-PRELOAD-4-NEXT: s_and_b32 s0, s8, 0xff -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 -; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7] -; GFX90a-PRELOAD-4-NEXT: s_endpgm -; -; GFX90a-PRELOAD-8-LABEL: ptr1_i8: +; GFX90a-PRELOAD-8-LABEL: ptr1_i8_kernel_preload_arg: ; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-8-NEXT: ; %bb.0: @@ -122,8 +74,8 @@ define amdgpu_kernel void @ptr1_i8(ptr addrspace(1) %out, i8 %arg0) { ret void } -define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) %out, i8 zeroext %arg0) { -; GFX940-NO-PRELOAD-LABEL: ptr1_i8_zext_arg: +define amdgpu_kernel void @ptr1_i8_zext_kernel_preload_arg(ptr addrspace(1) %out, i8 zeroext %arg0) { +; GFX940-NO-PRELOAD-LABEL: ptr1_i8_zext_kernel_preload_arg: ; GFX940-NO-PRELOAD: ; %bb.0: ; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8 ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 @@ -134,19 +86,7 @@ define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) %out, i8 zeroext %a ; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NO-PRELOAD-NEXT: s_endpgm ; -; GFX940-PRELOAD-1-LABEL: ptr1_i8_zext_arg: -; GFX940-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-1-NEXT: ; %bb.0: -; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xff -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-PRELOAD-1-NEXT: s_endpgm -; -; GFX940-PRELOAD-2-LABEL: ptr1_i8_zext_arg: +; GFX940-PRELOAD-2-LABEL: ptr1_i8_zext_kernel_preload_arg: ; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-2-NEXT: ; %bb.0: @@ -157,18 +97,7 @@ define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) %out, i8 zeroext %a ; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-PRELOAD-2-NEXT: s_endpgm ; -; GFX940-PRELOAD-4-LABEL: ptr1_i8_zext_arg: -; GFX940-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-4-NEXT: ; %bb.0: -; GFX940-PRELOAD-4-NEXT: s_mov_b32 s0, 0xffff -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-PRELOAD-4-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-PRELOAD-4-NEXT: s_endpgm -; -; GFX940-PRELOAD-8-LABEL: ptr1_i8_zext_arg: +; GFX940-PRELOAD-8-LABEL: ptr1_i8_zext_kernel_preload_arg: ; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-8-NEXT: ; %bb.0: @@ -179,7 +108,7 @@ define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) %out, i8 zeroext %a ; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-PRELOAD-8-NEXT: s_endpgm ; -; GFX90a-NO-PRELOAD-LABEL: ptr1_i8_zext_arg: +; GFX90a-NO-PRELOAD-LABEL: ptr1_i8_zext_kernel_preload_arg: ; GFX90a-NO-PRELOAD: ; %bb.0: ; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8 ; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -190,19 +119,7 @@ define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) %out, i8 zeroext %a ; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90a-NO-PRELOAD-NEXT: s_endpgm ; -; GFX90a-PRELOAD-1-LABEL: ptr1_i8_zext_arg: -; GFX90a-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-1-NEXT: ; %bb.0: -; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX90a-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xff -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[6:7] -; GFX90a-PRELOAD-1-NEXT: s_endpgm -; -; GFX90a-PRELOAD-2-LABEL: ptr1_i8_zext_arg: +; GFX90a-PRELOAD-2-LABEL: ptr1_i8_zext_kernel_preload_arg: ; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-2-NEXT: ; %bb.0: @@ -213,18 +130,7 @@ define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) %out, i8 zeroext %a ; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7] ; GFX90a-PRELOAD-2-NEXT: s_endpgm ; -; GFX90a-PRELOAD-4-LABEL: ptr1_i8_zext_arg: -; GFX90a-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-4-NEXT: ; %bb.0: -; GFX90a-PRELOAD-4-NEXT: s_mov_b32 s0, 0xffff -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s8 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; GFX90a-PRELOAD-4-NEXT: v_and_b32_sdwa v1, s0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7] -; GFX90a-PRELOAD-4-NEXT: s_endpgm -; -; GFX90a-PRELOAD-8-LABEL: ptr1_i8_zext_arg: +; GFX90a-PRELOAD-8-LABEL: ptr1_i8_zext_kernel_preload_arg: ; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-8-NEXT: ; %bb.0: @@ -239,8 +145,8 @@ define amdgpu_kernel void @ptr1_i8_zext_arg(ptr addrspace(1) %out, i8 zeroext %a ret void } -define amdgpu_kernel void @ptr1_i16_preload_arg(ptr addrspace(1) %out, i16 %arg0) { -; GFX940-NO-PRELOAD-LABEL: ptr1_i16_preload_arg: +define amdgpu_kernel void @ptr1_i16_kernel_preload_arg(ptr addrspace(1) %out, i16 %arg0) { +; GFX940-NO-PRELOAD-LABEL: ptr1_i16_kernel_preload_arg: ; GFX940-NO-PRELOAD: ; %bb.0: ; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8 ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 @@ -251,19 +157,7 @@ define amdgpu_kernel void @ptr1_i16_preload_arg(ptr addrspace(1) %out, i16 %arg0 ; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NO-PRELOAD-NEXT: s_endpgm ; -; GFX940-PRELOAD-1-LABEL: ptr1_i16_preload_arg: -; GFX940-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-1-NEXT: ; %bb.0: -; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xffff -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-PRELOAD-1-NEXT: s_endpgm -; -; GFX940-PRELOAD-2-LABEL: ptr1_i16_preload_arg: +; GFX940-PRELOAD-2-LABEL: ptr1_i16_kernel_preload_arg: ; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-2-NEXT: ; %bb.0: @@ -273,17 +167,7 @@ define amdgpu_kernel void @ptr1_i16_preload_arg(ptr addrspace(1) %out, i16 %arg0 ; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-PRELOAD-2-NEXT: s_endpgm ; -; GFX940-PRELOAD-4-LABEL: ptr1_i16_preload_arg: -; GFX940-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-4-NEXT: ; %bb.0: -; GFX940-PRELOAD-4-NEXT: s_and_b32 s0, s4, 0xffff -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-PRELOAD-4-NEXT: s_endpgm -; -; GFX940-PRELOAD-8-LABEL: ptr1_i16_preload_arg: +; GFX940-PRELOAD-8-LABEL: ptr1_i16_kernel_preload_arg: ; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-8-NEXT: ; %bb.0: @@ -293,7 +177,7 @@ define amdgpu_kernel void @ptr1_i16_preload_arg(ptr addrspace(1) %out, i16 %arg0 ; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-PRELOAD-8-NEXT: s_endpgm ; -; GFX90a-NO-PRELOAD-LABEL: ptr1_i16_preload_arg: +; GFX90a-NO-PRELOAD-LABEL: ptr1_i16_kernel_preload_arg: ; GFX90a-NO-PRELOAD: ; %bb.0: ; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8 ; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -304,19 +188,7 @@ define amdgpu_kernel void @ptr1_i16_preload_arg(ptr addrspace(1) %out, i16 %arg0 ; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90a-NO-PRELOAD-NEXT: s_endpgm ; -; GFX90a-PRELOAD-1-LABEL: ptr1_i16_preload_arg: -; GFX90a-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-1-NEXT: ; %bb.0: -; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX90a-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xffff -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[6:7] -; GFX90a-PRELOAD-1-NEXT: s_endpgm -; -; GFX90a-PRELOAD-2-LABEL: ptr1_i16_preload_arg: +; GFX90a-PRELOAD-2-LABEL: ptr1_i16_kernel_preload_arg: ; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-2-NEXT: ; %bb.0: @@ -326,17 +198,7 @@ define amdgpu_kernel void @ptr1_i16_preload_arg(ptr addrspace(1) %out, i16 %arg0 ; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7] ; GFX90a-PRELOAD-2-NEXT: s_endpgm ; -; GFX90a-PRELOAD-4-LABEL: ptr1_i16_preload_arg: -; GFX90a-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-4-NEXT: ; %bb.0: -; GFX90a-PRELOAD-4-NEXT: s_and_b32 s0, s8, 0xffff -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 -; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7] -; GFX90a-PRELOAD-4-NEXT: s_endpgm -; -; GFX90a-PRELOAD-8-LABEL: ptr1_i16_preload_arg: +; GFX90a-PRELOAD-8-LABEL: ptr1_i16_kernel_preload_arg: ; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-8-NEXT: ; %bb.0: @@ -350,8 +212,8 @@ define amdgpu_kernel void @ptr1_i16_preload_arg(ptr addrspace(1) %out, i16 %arg0 ret void } -define amdgpu_kernel void @ptr1_i32_preload_arg(ptr addrspace(1) %out, i32 %arg0) { -; GFX940-NO-PRELOAD-LABEL: ptr1_i32_preload_arg: +define amdgpu_kernel void @ptr1_i32_kernel_preload_arg(ptr addrspace(1) %out, i32 %arg0) { +; GFX940-NO-PRELOAD-LABEL: ptr1_i32_kernel_preload_arg: ; GFX940-NO-PRELOAD: ; %bb.0: ; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8 ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 @@ -361,18 +223,7 @@ define amdgpu_kernel void @ptr1_i32_preload_arg(ptr addrspace(1) %out, i32 %arg0 ; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NO-PRELOAD-NEXT: s_endpgm ; -; GFX940-PRELOAD-1-LABEL: ptr1_i32_preload_arg: -; GFX940-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-1-NEXT: ; %bb.0: -; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-PRELOAD-1-NEXT: s_endpgm -; -; GFX940-PRELOAD-2-LABEL: ptr1_i32_preload_arg: +; GFX940-PRELOAD-2-LABEL: ptr1_i32_kernel_preload_arg: ; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-2-NEXT: ; %bb.0: @@ -381,16 +232,7 @@ define amdgpu_kernel void @ptr1_i32_preload_arg(ptr addrspace(1) %out, i32 %arg0 ; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-PRELOAD-2-NEXT: s_endpgm ; -; GFX940-PRELOAD-4-LABEL: ptr1_i32_preload_arg: -; GFX940-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-4-NEXT: ; %bb.0: -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-PRELOAD-4-NEXT: s_endpgm -; -; GFX940-PRELOAD-8-LABEL: ptr1_i32_preload_arg: +; GFX940-PRELOAD-8-LABEL: ptr1_i32_kernel_preload_arg: ; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-8-NEXT: ; %bb.0: @@ -399,7 +241,7 @@ define amdgpu_kernel void @ptr1_i32_preload_arg(ptr addrspace(1) %out, i32 %arg0 ; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-PRELOAD-8-NEXT: s_endpgm ; -; GFX90a-NO-PRELOAD-LABEL: ptr1_i32_preload_arg: +; GFX90a-NO-PRELOAD-LABEL: ptr1_i32_kernel_preload_arg: ; GFX90a-NO-PRELOAD: ; %bb.0: ; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8 ; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -409,18 +251,7 @@ define amdgpu_kernel void @ptr1_i32_preload_arg(ptr addrspace(1) %out, i32 %arg0 ; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90a-NO-PRELOAD-NEXT: s_endpgm ; -; GFX90a-PRELOAD-1-LABEL: ptr1_i32_preload_arg: -; GFX90a-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-1-NEXT: ; %bb.0: -; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[6:7] -; GFX90a-PRELOAD-1-NEXT: s_endpgm -; -; GFX90a-PRELOAD-2-LABEL: ptr1_i32_preload_arg: +; GFX90a-PRELOAD-2-LABEL: ptr1_i32_kernel_preload_arg: ; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-2-NEXT: ; %bb.0: @@ -429,16 +260,7 @@ define amdgpu_kernel void @ptr1_i32_preload_arg(ptr addrspace(1) %out, i32 %arg0 ; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7] ; GFX90a-PRELOAD-2-NEXT: s_endpgm ; -; GFX90a-PRELOAD-4-LABEL: ptr1_i32_preload_arg: -; GFX90a-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-4-NEXT: ; %bb.0: -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s8 -; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7] -; GFX90a-PRELOAD-4-NEXT: s_endpgm -; -; GFX90a-PRELOAD-8-LABEL: ptr1_i32_preload_arg: +; GFX90a-PRELOAD-8-LABEL: ptr1_i32_kernel_preload_arg: ; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-8-NEXT: ; %bb.0: @@ -451,8 +273,8 @@ define amdgpu_kernel void @ptr1_i32_preload_arg(ptr addrspace(1) %out, i32 %arg0 } -define amdgpu_kernel void @i32_ptr1_i32_preload_arg(i32 %arg0, ptr addrspace(1) %out, i32 %arg1) { -; GFX940-NO-PRELOAD-LABEL: i32_ptr1_i32_preload_arg: +define amdgpu_kernel void @i32_ptr1_i32_kernel_preload_arg(i32 %arg0, ptr addrspace(1) %out, i32 %arg1) { +; GFX940-NO-PRELOAD-LABEL: i32_ptr1_i32_kernel_preload_arg: ; GFX940-NO-PRELOAD: ; %bb.0: ; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x10 ; GFX940-NO-PRELOAD-NEXT: s_load_dword s5, s[0:1], 0x0 @@ -464,20 +286,7 @@ define amdgpu_kernel void @i32_ptr1_i32_preload_arg(i32 %arg0, ptr addrspace(1) ; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NO-PRELOAD-NEXT: s_endpgm ; -; GFX940-PRELOAD-1-LABEL: i32_ptr1_i32_preload_arg: -; GFX940-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-1-NEXT: ; %bb.0: -; GFX940-PRELOAD-1-NEXT: s_load_dword s3, s[0:1], 0x10 -; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x8 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-PRELOAD-1-NEXT: s_add_i32 s0, s2, s3 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1 -; GFX940-PRELOAD-1-NEXT: s_endpgm -; -; GFX940-PRELOAD-2-LABEL: i32_ptr1_i32_preload_arg: +; GFX940-PRELOAD-2-LABEL: i32_ptr1_i32_kernel_preload_arg: ; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-2-NEXT: ; %bb.0: @@ -489,17 +298,7 @@ define amdgpu_kernel void @i32_ptr1_i32_preload_arg(i32 %arg0, ptr addrspace(1) ; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1 ; GFX940-PRELOAD-2-NEXT: s_endpgm ; -; GFX940-PRELOAD-4-LABEL: i32_ptr1_i32_preload_arg: -; GFX940-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-4-NEXT: ; %bb.0: -; GFX940-PRELOAD-4-NEXT: s_add_i32 s0, s2, s6 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1 -; GFX940-PRELOAD-4-NEXT: s_endpgm -; -; GFX940-PRELOAD-8-LABEL: i32_ptr1_i32_preload_arg: +; GFX940-PRELOAD-8-LABEL: i32_ptr1_i32_kernel_preload_arg: ; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-8-NEXT: ; %bb.0: @@ -509,7 +308,7 @@ define amdgpu_kernel void @i32_ptr1_i32_preload_arg(i32 %arg0, ptr addrspace(1) ; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[4:5] sc0 sc1 ; GFX940-PRELOAD-8-NEXT: s_endpgm ; -; GFX90a-NO-PRELOAD-LABEL: i32_ptr1_i32_preload_arg: +; GFX90a-NO-PRELOAD-LABEL: i32_ptr1_i32_kernel_preload_arg: ; GFX90a-NO-PRELOAD: ; %bb.0: ; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x10 ; GFX90a-NO-PRELOAD-NEXT: s_load_dword s3, s[4:5], 0x0 @@ -521,20 +320,7 @@ define amdgpu_kernel void @i32_ptr1_i32_preload_arg(i32 %arg0, ptr addrspace(1) ; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90a-NO-PRELOAD-NEXT: s_endpgm ; -; GFX90a-PRELOAD-1-LABEL: i32_ptr1_i32_preload_arg: -; GFX90a-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-1-NEXT: ; %bb.0: -; GFX90a-PRELOAD-1-NEXT: s_load_dword s2, s[4:5], 0x10 -; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX90a-PRELOAD-1-NEXT: s_add_i32 s2, s6, s2 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s2 -; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[0:1] -; GFX90a-PRELOAD-1-NEXT: s_endpgm -; -; GFX90a-PRELOAD-2-LABEL: i32_ptr1_i32_preload_arg: +; GFX90a-PRELOAD-2-LABEL: i32_ptr1_i32_kernel_preload_arg: ; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-2-NEXT: ; %bb.0: @@ -546,17 +332,7 @@ define amdgpu_kernel void @i32_ptr1_i32_preload_arg(i32 %arg0, ptr addrspace(1) ; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[8:9] ; GFX90a-PRELOAD-2-NEXT: s_endpgm ; -; GFX90a-PRELOAD-4-LABEL: i32_ptr1_i32_preload_arg: -; GFX90a-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-4-NEXT: ; %bb.0: -; GFX90a-PRELOAD-4-NEXT: s_add_i32 s0, s6, s10 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 -; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[8:9] -; GFX90a-PRELOAD-4-NEXT: s_endpgm -; -; GFX90a-PRELOAD-8-LABEL: i32_ptr1_i32_preload_arg: +; GFX90a-PRELOAD-8-LABEL: i32_ptr1_i32_kernel_preload_arg: ; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-8-NEXT: ; %bb.0: @@ -570,8 +346,8 @@ define amdgpu_kernel void @i32_ptr1_i32_preload_arg(i32 %arg0, ptr addrspace(1) ret void } -define amdgpu_kernel void @ptr1_i16_i16_preload_arg(ptr addrspace(1) %out, i16 %arg0, i16 %arg1) { -; GFX940-NO-PRELOAD-LABEL: ptr1_i16_i16_preload_arg: +define amdgpu_kernel void @ptr1_i16_i16_kernel_preload_arg(ptr addrspace(1) %out, i16 %arg0, i16 %arg1) { +; GFX940-NO-PRELOAD-LABEL: ptr1_i16_i16_kernel_preload_arg: ; GFX940-NO-PRELOAD: ; %bb.0: ; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8 ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 @@ -584,21 +360,7 @@ define amdgpu_kernel void @ptr1_i16_i16_preload_arg(ptr addrspace(1) %out, i16 % ; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NO-PRELOAD-NEXT: s_endpgm ; -; GFX940-PRELOAD-1-LABEL: ptr1_i16_i16_preload_arg: -; GFX940-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-1-NEXT: ; %bb.0: -; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-PRELOAD-1-NEXT: s_lshr_b32 s1, s0, 16 -; GFX940-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xffff -; GFX940-PRELOAD-1-NEXT: s_add_i32 s0, s0, s1 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-PRELOAD-1-NEXT: s_endpgm -; -; GFX940-PRELOAD-2-LABEL: ptr1_i16_i16_preload_arg: +; GFX940-PRELOAD-2-LABEL: ptr1_i16_i16_kernel_preload_arg: ; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-2-NEXT: ; %bb.0: @@ -612,19 +374,7 @@ define amdgpu_kernel void @ptr1_i16_i16_preload_arg(ptr addrspace(1) %out, i16 % ; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-PRELOAD-2-NEXT: s_endpgm ; -; GFX940-PRELOAD-4-LABEL: ptr1_i16_i16_preload_arg: -; GFX940-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-4-NEXT: ; %bb.0: -; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-PRELOAD-4-NEXT: s_and_b32 s1, s4, 0xffff -; GFX940-PRELOAD-4-NEXT: s_add_i32 s0, s1, s0 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-PRELOAD-4-NEXT: s_endpgm -; -; GFX940-PRELOAD-8-LABEL: ptr1_i16_i16_preload_arg: +; GFX940-PRELOAD-8-LABEL: ptr1_i16_i16_kernel_preload_arg: ; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-8-NEXT: ; %bb.0: @@ -636,7 +386,7 @@ define amdgpu_kernel void @ptr1_i16_i16_preload_arg(ptr addrspace(1) %out, i16 % ; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-PRELOAD-8-NEXT: s_endpgm ; -; GFX90a-NO-PRELOAD-LABEL: ptr1_i16_i16_preload_arg: +; GFX90a-NO-PRELOAD-LABEL: ptr1_i16_i16_kernel_preload_arg: ; GFX90a-NO-PRELOAD: ; %bb.0: ; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8 ; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -649,21 +399,7 @@ define amdgpu_kernel void @ptr1_i16_i16_preload_arg(ptr addrspace(1) %out, i16 % ; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1] ; GFX90a-NO-PRELOAD-NEXT: s_endpgm ; -; GFX90a-PRELOAD-1-LABEL: ptr1_i16_i16_preload_arg: -; GFX90a-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-1-NEXT: ; %bb.0: -; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX90a-PRELOAD-1-NEXT: s_lshr_b32 s1, s0, 16 -; GFX90a-PRELOAD-1-NEXT: s_and_b32 s0, s0, 0xffff -; GFX90a-PRELOAD-1-NEXT: s_add_i32 s0, s0, s1 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[6:7] -; GFX90a-PRELOAD-1-NEXT: s_endpgm -; -; GFX90a-PRELOAD-2-LABEL: ptr1_i16_i16_preload_arg: +; GFX90a-PRELOAD-2-LABEL: ptr1_i16_i16_kernel_preload_arg: ; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-2-NEXT: ; %bb.0: @@ -677,19 +413,7 @@ define amdgpu_kernel void @ptr1_i16_i16_preload_arg(ptr addrspace(1) %out, i16 % ; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7] ; GFX90a-PRELOAD-2-NEXT: s_endpgm ; -; GFX90a-PRELOAD-4-LABEL: ptr1_i16_i16_preload_arg: -; GFX90a-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-4-NEXT: ; %bb.0: -; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 16 -; GFX90a-PRELOAD-4-NEXT: s_and_b32 s1, s8, 0xffff -; GFX90a-PRELOAD-4-NEXT: s_add_i32 s0, s1, s0 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 -; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7] -; GFX90a-PRELOAD-4-NEXT: s_endpgm -; -; GFX90a-PRELOAD-8-LABEL: ptr1_i16_i16_preload_arg: +; GFX90a-PRELOAD-8-LABEL: ptr1_i16_i16_kernel_preload_arg: ; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-8-NEXT: ; %bb.0: @@ -707,8 +431,8 @@ define amdgpu_kernel void @ptr1_i16_i16_preload_arg(ptr addrspace(1) %out, i16 % ret void } -define amdgpu_kernel void @ptr1_v2i8_preload_arg(ptr addrspace(1) %out, <2 x i8> %in) { -; GFX940-NO-PRELOAD-LABEL: ptr1_v2i8_preload_arg: +define amdgpu_kernel void @ptr1_v2i8_kernel_preload_arg(ptr addrspace(1) %out, <2 x i8> %in) { +; GFX940-NO-PRELOAD-LABEL: ptr1_v2i8_kernel_preload_arg: ; GFX940-NO-PRELOAD: ; %bb.0: ; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8 ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 @@ -718,18 +442,7 @@ define amdgpu_kernel void @ptr1_v2i8_preload_arg(ptr addrspace(1) %out, <2 x i8> ; GFX940-NO-PRELOAD-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 ; GFX940-NO-PRELOAD-NEXT: s_endpgm ; -; GFX940-PRELOAD-1-LABEL: ptr1_v2i8_preload_arg: -; GFX940-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-1-NEXT: ; %bb.0: -; GFX940-PRELOAD-1-NEXT: s_load_dword s0, s[0:1], 0x8 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-PRELOAD-1-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 -; GFX940-PRELOAD-1-NEXT: s_endpgm -; -; GFX940-PRELOAD-2-LABEL: ptr1_v2i8_preload_arg: +; GFX940-PRELOAD-2-LABEL: ptr1_v2i8_kernel_preload_arg: ; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-2-NEXT: ; %bb.0: @@ -740,18 +453,7 @@ define amdgpu_kernel void @ptr1_v2i8_preload_arg(ptr addrspace(1) %out, <2 x i8> ; GFX940-PRELOAD-2-NEXT: global_store_short v1, v0, s[2:3] sc0 sc1 ; GFX940-PRELOAD-2-NEXT: s_endpgm ; -; GFX940-PRELOAD-4-LABEL: ptr1_v2i8_preload_arg: -; GFX940-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-4-NEXT: ; %bb.0: -; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 8 -; GFX940-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0 -; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-PRELOAD-4-NEXT: global_store_short v1, v0, s[2:3] sc0 sc1 -; GFX940-PRELOAD-4-NEXT: s_endpgm -; -; GFX940-PRELOAD-8-LABEL: ptr1_v2i8_preload_arg: +; GFX940-PRELOAD-8-LABEL: ptr1_v2i8_kernel_preload_arg: ; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-8-NEXT: ; %bb.0: @@ -762,7 +464,7 @@ define amdgpu_kernel void @ptr1_v2i8_preload_arg(ptr addrspace(1) %out, <2 x i8> ; GFX940-PRELOAD-8-NEXT: global_store_short v1, v0, s[2:3] sc0 sc1 ; GFX940-PRELOAD-8-NEXT: s_endpgm ; -; GFX90a-NO-PRELOAD-LABEL: ptr1_v2i8_preload_arg: +; GFX90a-NO-PRELOAD-LABEL: ptr1_v2i8_kernel_preload_arg: ; GFX90a-NO-PRELOAD: ; %bb.0: ; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8 ; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -772,18 +474,7 @@ define amdgpu_kernel void @ptr1_v2i8_preload_arg(ptr addrspace(1) %out, <2 x i8> ; GFX90a-NO-PRELOAD-NEXT: global_store_short v0, v1, s[0:1] ; GFX90a-NO-PRELOAD-NEXT: s_endpgm ; -; GFX90a-PRELOAD-1-LABEL: ptr1_v2i8_preload_arg: -; GFX90a-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-1-NEXT: ; %bb.0: -; GFX90a-PRELOAD-1-NEXT: s_load_dword s0, s[4:5], 0x8 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; GFX90a-PRELOAD-1-NEXT: global_store_short v0, v1, s[6:7] -; GFX90a-PRELOAD-1-NEXT: s_endpgm -; -; GFX90a-PRELOAD-2-LABEL: ptr1_v2i8_preload_arg: +; GFX90a-PRELOAD-2-LABEL: ptr1_v2i8_kernel_preload_arg: ; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-2-NEXT: ; %bb.0: @@ -794,18 +485,7 @@ define amdgpu_kernel void @ptr1_v2i8_preload_arg(ptr addrspace(1) %out, <2 x i8> ; GFX90a-PRELOAD-2-NEXT: global_store_short v1, v0, s[6:7] ; GFX90a-PRELOAD-2-NEXT: s_endpgm ; -; GFX90a-PRELOAD-4-LABEL: ptr1_v2i8_preload_arg: -; GFX90a-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-4-NEXT: ; %bb.0: -; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 8 -; GFX90a-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0 -; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, 0 -; GFX90a-PRELOAD-4-NEXT: global_store_short v1, v0, s[6:7] -; GFX90a-PRELOAD-4-NEXT: s_endpgm -; -; GFX90a-PRELOAD-8-LABEL: ptr1_v2i8_preload_arg: +; GFX90a-PRELOAD-8-LABEL: ptr1_v2i8_kernel_preload_arg: ; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-8-NEXT: ; %bb.0: @@ -820,8 +500,8 @@ define amdgpu_kernel void @ptr1_v2i8_preload_arg(ptr addrspace(1) %out, <2 x i8> } -define amdgpu_kernel void @byref_preload_arg(ptr addrspace(1) %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 %after.offset) { -; GFX940-NO-PRELOAD-LABEL: byref_preload_arg: +define amdgpu_kernel void @byref_kernel_preload_arg(ptr addrspace(1) %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 %after.offset) { +; GFX940-NO-PRELOAD-LABEL: byref_kernel_preload_arg: ; GFX940-NO-PRELOAD: ; %bb.0: ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x100 ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 @@ -835,22 +515,7 @@ define amdgpu_kernel void @byref_preload_arg(ptr addrspace(1) %out, ptr addrspac ; GFX940-NO-PRELOAD-NEXT: s_waitcnt vmcnt(0) ; GFX940-NO-PRELOAD-NEXT: s_endpgm ; -; GFX940-PRELOAD-1-LABEL: byref_preload_arg: -; GFX940-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-1-NEXT: ; %bb.0: -; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x100 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-PRELOAD-1-NEXT: s_waitcnt vmcnt(0) -; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1 -; GFX940-PRELOAD-1-NEXT: s_waitcnt vmcnt(0) -; GFX940-PRELOAD-1-NEXT: s_endpgm -; -; GFX940-PRELOAD-2-LABEL: byref_preload_arg: +; GFX940-PRELOAD-2-LABEL: byref_kernel_preload_arg: ; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-2-NEXT: ; %bb.0: @@ -865,22 +530,7 @@ define amdgpu_kernel void @byref_preload_arg(ptr addrspace(1) %out, ptr addrspac ; GFX940-PRELOAD-2-NEXT: s_waitcnt vmcnt(0) ; GFX940-PRELOAD-2-NEXT: s_endpgm ; -; GFX940-PRELOAD-4-LABEL: byref_preload_arg: -; GFX940-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-4-NEXT: ; %bb.0: -; GFX940-PRELOAD-4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x100 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s1 -; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-PRELOAD-4-NEXT: s_waitcnt vmcnt(0) -; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1 -; GFX940-PRELOAD-4-NEXT: s_waitcnt vmcnt(0) -; GFX940-PRELOAD-4-NEXT: s_endpgm -; -; GFX940-PRELOAD-8-LABEL: byref_preload_arg: +; GFX940-PRELOAD-8-LABEL: byref_kernel_preload_arg: ; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-8-NEXT: ; %bb.0: @@ -895,7 +545,7 @@ define amdgpu_kernel void @byref_preload_arg(ptr addrspace(1) %out, ptr addrspac ; GFX940-PRELOAD-8-NEXT: s_waitcnt vmcnt(0) ; GFX940-PRELOAD-8-NEXT: s_endpgm ; -; GFX90a-NO-PRELOAD-LABEL: byref_preload_arg: +; GFX90a-NO-PRELOAD-LABEL: byref_kernel_preload_arg: ; GFX90a-NO-PRELOAD: ; %bb.0: ; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x100 ; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0 @@ -909,22 +559,7 @@ define amdgpu_kernel void @byref_preload_arg(ptr addrspace(1) %out, ptr addrspac ; GFX90a-NO-PRELOAD-NEXT: s_waitcnt vmcnt(0) ; GFX90a-NO-PRELOAD-NEXT: s_endpgm ; -; GFX90a-PRELOAD-1-LABEL: byref_preload_arg: -; GFX90a-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-1-NEXT: ; %bb.0: -; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x100 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s0 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s1 -; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v1, s[6:7] -; GFX90a-PRELOAD-1-NEXT: s_waitcnt vmcnt(0) -; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v2, s[6:7] -; GFX90a-PRELOAD-1-NEXT: s_waitcnt vmcnt(0) -; GFX90a-PRELOAD-1-NEXT: s_endpgm -; -; GFX90a-PRELOAD-2-LABEL: byref_preload_arg: +; GFX90a-PRELOAD-2-LABEL: byref_kernel_preload_arg: ; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-2-NEXT: ; %bb.0: @@ -939,22 +574,7 @@ define amdgpu_kernel void @byref_preload_arg(ptr addrspace(1) %out, ptr addrspac ; GFX90a-PRELOAD-2-NEXT: s_waitcnt vmcnt(0) ; GFX90a-PRELOAD-2-NEXT: s_endpgm ; -; GFX90a-PRELOAD-4-LABEL: byref_preload_arg: -; GFX90a-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-4-NEXT: ; %bb.0: -; GFX90a-PRELOAD-4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x100 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; GFX90a-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0) -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s0 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s1 -; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7] -; GFX90a-PRELOAD-4-NEXT: s_waitcnt vmcnt(0) -; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v2, s[6:7] -; GFX90a-PRELOAD-4-NEXT: s_waitcnt vmcnt(0) -; GFX90a-PRELOAD-4-NEXT: s_endpgm -; -; GFX90a-PRELOAD-8-LABEL: byref_preload_arg: +; GFX90a-PRELOAD-8-LABEL: byref_kernel_preload_arg: ; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-8-NEXT: ; %bb.0: @@ -975,8 +595,8 @@ define amdgpu_kernel void @byref_preload_arg(ptr addrspace(1) %out, ptr addrspac } -define amdgpu_kernel void @v8i32_arg(ptr addrspace(1) nocapture %out, <8 x i32> %in) nounwind { -; GFX940-NO-PRELOAD-LABEL: v8i32_arg: +define amdgpu_kernel void @v8i32_kernel_preload_arg(ptr addrspace(1) nocapture %out, <8 x i32> %in) nounwind { +; GFX940-NO-PRELOAD-LABEL: v8i32_kernel_preload_arg: ; GFX940-NO-PRELOAD: ; %bb.0: ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20 ; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v4, 0 @@ -995,27 +615,7 @@ define amdgpu_kernel void @v8i32_arg(ptr addrspace(1) nocapture %out, <8 x i32> ; GFX940-NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1 ; GFX940-NO-PRELOAD-NEXT: s_endpgm ; -; GFX940-PRELOAD-1-LABEL: v8i32_arg: -; GFX940-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-1-NEXT: ; %bb.0: -; GFX940-PRELOAD-1-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s8 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s9 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s11 -; GFX940-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1 -; GFX940-PRELOAD-1-NEXT: s_nop 1 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s4 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s5 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s6 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s7 -; GFX940-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 -; GFX940-PRELOAD-1-NEXT: s_endpgm -; -; GFX940-PRELOAD-2-LABEL: v8i32_arg: +; GFX940-PRELOAD-2-LABEL: v8i32_kernel_preload_arg: ; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-2-NEXT: ; %bb.0: @@ -1035,27 +635,7 @@ define amdgpu_kernel void @v8i32_arg(ptr addrspace(1) nocapture %out, <8 x i32> ; GFX940-PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 ; GFX940-PRELOAD-2-NEXT: s_endpgm ; -; GFX940-PRELOAD-4-LABEL: v8i32_arg: -; GFX940-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-4-NEXT: ; %bb.0: -; GFX940-PRELOAD-4-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x20 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s8 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s9 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s11 -; GFX940-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1 -; GFX940-PRELOAD-4-NEXT: s_nop 1 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s4 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s5 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s6 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s7 -; GFX940-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 -; GFX940-PRELOAD-4-NEXT: s_endpgm -; -; GFX940-PRELOAD-8-LABEL: v8i32_arg: +; GFX940-PRELOAD-8-LABEL: v8i32_kernel_preload_arg: ; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-8-NEXT: ; %bb.0: @@ -1075,7 +655,7 @@ define amdgpu_kernel void @v8i32_arg(ptr addrspace(1) nocapture %out, <8 x i32> ; GFX940-PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 ; GFX940-PRELOAD-8-NEXT: s_endpgm ; -; GFX90a-NO-PRELOAD-LABEL: v8i32_arg: +; GFX90a-NO-PRELOAD-LABEL: v8i32_kernel_preload_arg: ; GFX90a-NO-PRELOAD: ; %bb.0: ; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x20 ; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -1094,27 +674,7 @@ define amdgpu_kernel void @v8i32_arg(ptr addrspace(1) nocapture %out, <8 x i32> ; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX90a-NO-PRELOAD-NEXT: s_endpgm ; -; GFX90a-PRELOAD-1-LABEL: v8i32_arg: -; GFX90a-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-1-NEXT: ; %bb.0: -; GFX90a-PRELOAD-1-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x20 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v4, 0 -; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s12 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s13 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s14 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s15 -; GFX90a-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16 -; GFX90a-PRELOAD-1-NEXT: s_nop 0 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s8 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s9 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s10 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s11 -; GFX90a-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] -; GFX90a-PRELOAD-1-NEXT: s_endpgm -; -; GFX90a-PRELOAD-2-LABEL: v8i32_arg: +; GFX90a-PRELOAD-2-LABEL: v8i32_kernel_preload_arg: ; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-2-NEXT: ; %bb.0: @@ -1134,27 +694,7 @@ define amdgpu_kernel void @v8i32_arg(ptr addrspace(1) nocapture %out, <8 x i32> ; GFX90a-PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; GFX90a-PRELOAD-2-NEXT: s_endpgm ; -; GFX90a-PRELOAD-4-LABEL: v8i32_arg: -; GFX90a-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-4-NEXT: ; %bb.0: -; GFX90a-PRELOAD-4-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x20 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v4, 0 -; GFX90a-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0) -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s12 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s13 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s14 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s15 -; GFX90a-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16 -; GFX90a-PRELOAD-4-NEXT: s_nop 0 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s8 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s9 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s10 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s11 -; GFX90a-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] -; GFX90a-PRELOAD-4-NEXT: s_endpgm -; -; GFX90a-PRELOAD-8-LABEL: v8i32_arg: +; GFX90a-PRELOAD-8-LABEL: v8i32_kernel_preload_arg: ; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-8-NEXT: ; %bb.0: @@ -1177,8 +717,8 @@ define amdgpu_kernel void @v8i32_arg(ptr addrspace(1) nocapture %out, <8 x i32> ret void } -define amdgpu_kernel void @v3i16_preload_arg(ptr addrspace(1) nocapture %out, <3 x i16> %in) nounwind { -; GFX940-NO-PRELOAD-LABEL: v3i16_preload_arg: +define amdgpu_kernel void @v3i16_kernel_preload_arg(ptr addrspace(1) nocapture %out, <3 x i16> %in) nounwind { +; GFX940-NO-PRELOAD-LABEL: v3i16_kernel_preload_arg: ; GFX940-NO-PRELOAD: ; %bb.0: ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 @@ -1189,20 +729,7 @@ define amdgpu_kernel void @v3i16_preload_arg(ptr addrspace(1) nocapture %out, <3 ; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 ; GFX940-NO-PRELOAD-NEXT: s_endpgm ; -; GFX940-PRELOAD-1-LABEL: v3i16_preload_arg: -; GFX940-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-1-NEXT: ; %bb.0: -; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-PRELOAD-1-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1 -; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1 -; GFX940-PRELOAD-1-NEXT: s_endpgm -; -; GFX940-PRELOAD-2-LABEL: v3i16_preload_arg: +; GFX940-PRELOAD-2-LABEL: v3i16_kernel_preload_arg: ; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-2-NEXT: ; %bb.0: @@ -1213,18 +740,7 @@ define amdgpu_kernel void @v3i16_preload_arg(ptr addrspace(1) nocapture %out, <3 ; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-PRELOAD-2-NEXT: s_endpgm ; -; GFX940-PRELOAD-4-LABEL: v3i16_preload_arg: -; GFX940-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-4-NEXT: ; %bb.0: -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s5 -; GFX940-PRELOAD-4-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-PRELOAD-4-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 -; GFX940-PRELOAD-4-NEXT: s_endpgm -; -; GFX940-PRELOAD-8-LABEL: v3i16_preload_arg: +; GFX940-PRELOAD-8-LABEL: v3i16_kernel_preload_arg: ; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-8-NEXT: ; %bb.0: @@ -1235,7 +751,7 @@ define amdgpu_kernel void @v3i16_preload_arg(ptr addrspace(1) nocapture %out, <3 ; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-PRELOAD-8-NEXT: s_endpgm ; -; GFX90a-NO-PRELOAD-LABEL: v3i16_preload_arg: +; GFX90a-NO-PRELOAD-LABEL: v3i16_kernel_preload_arg: ; GFX90a-NO-PRELOAD: ; %bb.0: ; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 @@ -1246,20 +762,7 @@ define amdgpu_kernel void @v3i16_preload_arg(ptr addrspace(1) nocapture %out, <3 ; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v2, s[0:1] ; GFX90a-NO-PRELOAD-NEXT: s_endpgm ; -; GFX90a-PRELOAD-1-LABEL: v3i16_preload_arg: -; GFX90a-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-1-NEXT: ; %bb.0: -; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s1 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s0 -; GFX90a-PRELOAD-1-NEXT: global_store_short v0, v1, s[6:7] offset:4 -; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v2, s[6:7] -; GFX90a-PRELOAD-1-NEXT: s_endpgm -; -; GFX90a-PRELOAD-2-LABEL: v3i16_preload_arg: +; GFX90a-PRELOAD-2-LABEL: v3i16_kernel_preload_arg: ; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-2-NEXT: ; %bb.0: @@ -1270,18 +773,7 @@ define amdgpu_kernel void @v3i16_preload_arg(ptr addrspace(1) nocapture %out, <3 ; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7] ; GFX90a-PRELOAD-2-NEXT: s_endpgm ; -; GFX90a-PRELOAD-4-LABEL: v3i16_preload_arg: -; GFX90a-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-4-NEXT: ; %bb.0: -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, 0 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s9 -; GFX90a-PRELOAD-4-NEXT: global_store_short v0, v1, s[6:7] offset:4 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s8 -; GFX90a-PRELOAD-4-NEXT: global_store_dword v0, v1, s[6:7] -; GFX90a-PRELOAD-4-NEXT: s_endpgm -; -; GFX90a-PRELOAD-8-LABEL: v3i16_preload_arg: +; GFX90a-PRELOAD-8-LABEL: v3i16_kernel_preload_arg: ; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-8-NEXT: ; %bb.0: @@ -1295,8 +787,8 @@ define amdgpu_kernel void @v3i16_preload_arg(ptr addrspace(1) nocapture %out, <3 ret void } -define amdgpu_kernel void @v3i32_preload_arg(ptr addrspace(1) nocapture %out, <3 x i32> %in) nounwind { -; GFX940-NO-PRELOAD-LABEL: v3i32_preload_arg: +define amdgpu_kernel void @v3i32_kernel_preload_arg(ptr addrspace(1) nocapture %out, <3 x i32> %in) nounwind { +; GFX940-NO-PRELOAD-LABEL: v3i32_kernel_preload_arg: ; GFX940-NO-PRELOAD: ; %bb.0: ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 @@ -1308,20 +800,7 @@ define amdgpu_kernel void @v3i32_preload_arg(ptr addrspace(1) nocapture %out, <3 ; GFX940-NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 ; GFX940-NO-PRELOAD-NEXT: s_endpgm ; -; GFX940-PRELOAD-1-LABEL: v3i32_preload_arg: -; GFX940-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-1-NEXT: ; %bb.0: -; GFX940-PRELOAD-1-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s4 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s5 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s6 -; GFX940-PRELOAD-1-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 -; GFX940-PRELOAD-1-NEXT: s_endpgm -; -; GFX940-PRELOAD-2-LABEL: v3i32_preload_arg: +; GFX940-PRELOAD-2-LABEL: v3i32_kernel_preload_arg: ; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-2-NEXT: ; %bb.0: @@ -1332,18 +811,7 @@ define amdgpu_kernel void @v3i32_preload_arg(ptr addrspace(1) nocapture %out, <3 ; GFX940-PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 ; GFX940-PRELOAD-2-NEXT: s_endpgm ; -; GFX940-PRELOAD-4-LABEL: v3i32_preload_arg: -; GFX940-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-4-NEXT: ; %bb.0: -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s6 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s7 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-PRELOAD-4-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 -; GFX940-PRELOAD-4-NEXT: s_endpgm -; -; GFX940-PRELOAD-8-LABEL: v3i32_preload_arg: +; GFX940-PRELOAD-8-LABEL: v3i32_kernel_preload_arg: ; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-8-NEXT: ; %bb.0: @@ -1354,7 +822,7 @@ define amdgpu_kernel void @v3i32_preload_arg(ptr addrspace(1) nocapture %out, <3 ; GFX940-PRELOAD-8-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 ; GFX940-PRELOAD-8-NEXT: s_endpgm ; -; GFX90a-NO-PRELOAD-LABEL: v3i32_preload_arg: +; GFX90a-NO-PRELOAD-LABEL: v3i32_kernel_preload_arg: ; GFX90a-NO-PRELOAD: ; %bb.0: ; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10 ; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0 @@ -1366,20 +834,7 @@ define amdgpu_kernel void @v3i32_preload_arg(ptr addrspace(1) nocapture %out, <3 ; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] ; GFX90a-NO-PRELOAD-NEXT: s_endpgm ; -; GFX90a-PRELOAD-1-LABEL: v3i32_preload_arg: -; GFX90a-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-1-NEXT: ; %bb.0: -; GFX90a-PRELOAD-1-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v3, 0 -; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s0 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s1 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s2 -; GFX90a-PRELOAD-1-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] -; GFX90a-PRELOAD-1-NEXT: s_endpgm -; -; GFX90a-PRELOAD-2-LABEL: v3i32_preload_arg: +; GFX90a-PRELOAD-2-LABEL: v3i32_kernel_preload_arg: ; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-2-NEXT: ; %bb.0: @@ -1390,18 +845,7 @@ define amdgpu_kernel void @v3i32_preload_arg(ptr addrspace(1) nocapture %out, <3 ; GFX90a-PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] ; GFX90a-PRELOAD-2-NEXT: s_endpgm ; -; GFX90a-PRELOAD-4-LABEL: v3i32_preload_arg: -; GFX90a-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-4-NEXT: ; %bb.0: -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s10 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s11 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s12 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v3, 0 -; GFX90a-PRELOAD-4-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] -; GFX90a-PRELOAD-4-NEXT: s_endpgm -; -; GFX90a-PRELOAD-8-LABEL: v3i32_preload_arg: +; GFX90a-PRELOAD-8-LABEL: v3i32_kernel_preload_arg: ; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-8-NEXT: ; %bb.0: @@ -1415,8 +859,8 @@ define amdgpu_kernel void @v3i32_preload_arg(ptr addrspace(1) nocapture %out, <3 ret void } -define amdgpu_kernel void @v3f32_preload_arg(ptr addrspace(1) nocapture %out, <3 x float> %in) nounwind { -; GFX940-NO-PRELOAD-LABEL: v3f32_preload_arg: +define amdgpu_kernel void @v3f32_kernel_preload_arg(ptr addrspace(1) nocapture %out, <3 x float> %in) nounwind { +; GFX940-NO-PRELOAD-LABEL: v3f32_kernel_preload_arg: ; GFX940-NO-PRELOAD: ; %bb.0: ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 @@ -1428,20 +872,7 @@ define amdgpu_kernel void @v3f32_preload_arg(ptr addrspace(1) nocapture %out, <3 ; GFX940-NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 ; GFX940-NO-PRELOAD-NEXT: s_endpgm ; -; GFX940-PRELOAD-1-LABEL: v3f32_preload_arg: -; GFX940-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-1-NEXT: ; %bb.0: -; GFX940-PRELOAD-1-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s4 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s5 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s6 -; GFX940-PRELOAD-1-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 -; GFX940-PRELOAD-1-NEXT: s_endpgm -; -; GFX940-PRELOAD-2-LABEL: v3f32_preload_arg: +; GFX940-PRELOAD-2-LABEL: v3f32_kernel_preload_arg: ; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-2-NEXT: ; %bb.0: @@ -1452,18 +883,7 @@ define amdgpu_kernel void @v3f32_preload_arg(ptr addrspace(1) nocapture %out, <3 ; GFX940-PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 ; GFX940-PRELOAD-2-NEXT: s_endpgm ; -; GFX940-PRELOAD-4-LABEL: v3f32_preload_arg: -; GFX940-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-4-NEXT: ; %bb.0: -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v3, 0 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s6 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s7 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s8 -; GFX940-PRELOAD-4-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 -; GFX940-PRELOAD-4-NEXT: s_endpgm -; -; GFX940-PRELOAD-8-LABEL: v3f32_preload_arg: +; GFX940-PRELOAD-8-LABEL: v3f32_kernel_preload_arg: ; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-8-NEXT: ; %bb.0: @@ -1474,7 +894,7 @@ define amdgpu_kernel void @v3f32_preload_arg(ptr addrspace(1) nocapture %out, <3 ; GFX940-PRELOAD-8-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 ; GFX940-PRELOAD-8-NEXT: s_endpgm ; -; GFX90a-NO-PRELOAD-LABEL: v3f32_preload_arg: +; GFX90a-NO-PRELOAD-LABEL: v3f32_kernel_preload_arg: ; GFX90a-NO-PRELOAD: ; %bb.0: ; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10 ; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0 @@ -1486,20 +906,7 @@ define amdgpu_kernel void @v3f32_preload_arg(ptr addrspace(1) nocapture %out, <3 ; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] ; GFX90a-NO-PRELOAD-NEXT: s_endpgm ; -; GFX90a-PRELOAD-1-LABEL: v3f32_preload_arg: -; GFX90a-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-1-NEXT: ; %bb.0: -; GFX90a-PRELOAD-1-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v3, 0 -; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s0 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s1 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s2 -; GFX90a-PRELOAD-1-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] -; GFX90a-PRELOAD-1-NEXT: s_endpgm -; -; GFX90a-PRELOAD-2-LABEL: v3f32_preload_arg: +; GFX90a-PRELOAD-2-LABEL: v3f32_kernel_preload_arg: ; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-2-NEXT: ; %bb.0: @@ -1510,18 +917,7 @@ define amdgpu_kernel void @v3f32_preload_arg(ptr addrspace(1) nocapture %out, <3 ; GFX90a-PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] ; GFX90a-PRELOAD-2-NEXT: s_endpgm ; -; GFX90a-PRELOAD-4-LABEL: v3f32_preload_arg: -; GFX90a-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-4-NEXT: ; %bb.0: -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v3, 0 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s10 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s11 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s12 -; GFX90a-PRELOAD-4-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] -; GFX90a-PRELOAD-4-NEXT: s_endpgm -; -; GFX90a-PRELOAD-8-LABEL: v3f32_preload_arg: +; GFX90a-PRELOAD-8-LABEL: v3f32_kernel_preload_arg: ; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-8-NEXT: ; %bb.0: @@ -1535,8 +931,8 @@ define amdgpu_kernel void @v3f32_preload_arg(ptr addrspace(1) nocapture %out, <3 ret void } -define amdgpu_kernel void @v5i8_preload_arg(ptr addrspace(1) nocapture %out, <5 x i8> %in) nounwind { -; GFX940-NO-PRELOAD-LABEL: v5i8_preload_arg: +define amdgpu_kernel void @v5i8_kernel_preload_arg(ptr addrspace(1) nocapture %out, <5 x i8> %in) nounwind { +; GFX940-NO-PRELOAD-LABEL: v5i8_kernel_preload_arg: ; GFX940-NO-PRELOAD: ; %bb.0: ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 @@ -1547,20 +943,7 @@ define amdgpu_kernel void @v5i8_preload_arg(ptr addrspace(1) nocapture %out, <5 ; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 ; GFX940-NO-PRELOAD-NEXT: s_endpgm ; -; GFX940-PRELOAD-1-LABEL: v5i8_preload_arg: -; GFX940-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-1-NEXT: ; %bb.0: -; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s1 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s0 -; GFX940-PRELOAD-1-NEXT: global_store_byte v0, v1, s[2:3] offset:4 sc0 sc1 -; GFX940-PRELOAD-1-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1 -; GFX940-PRELOAD-1-NEXT: s_endpgm -; -; GFX940-PRELOAD-2-LABEL: v5i8_preload_arg: +; GFX940-PRELOAD-2-LABEL: v5i8_kernel_preload_arg: ; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-2-NEXT: ; %bb.0: @@ -1578,25 +961,7 @@ define amdgpu_kernel void @v5i8_preload_arg(ptr addrspace(1) nocapture %out, <5 ; GFX940-PRELOAD-2-NEXT: global_store_dword v1, v0, s[2:3] sc0 sc1 ; GFX940-PRELOAD-2-NEXT: s_endpgm ; -; GFX940-PRELOAD-4-LABEL: v5i8_preload_arg: -; GFX940-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-4-NEXT: ; %bb.0: -; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 8 -; GFX940-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0 -; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 24 -; GFX940-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v1, 8, s0 -; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s5 -; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, 0 -; GFX940-PRELOAD-4-NEXT: global_store_byte v1, v2, s[2:3] offset:4 sc0 sc1 -; GFX940-PRELOAD-4-NEXT: global_store_dword v1, v0, s[2:3] sc0 sc1 -; GFX940-PRELOAD-4-NEXT: s_endpgm -; -; GFX940-PRELOAD-8-LABEL: v5i8_preload_arg: +; GFX940-PRELOAD-8-LABEL: v5i8_kernel_preload_arg: ; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-8-NEXT: ; %bb.0: @@ -1614,7 +979,7 @@ define amdgpu_kernel void @v5i8_preload_arg(ptr addrspace(1) nocapture %out, <5 ; GFX940-PRELOAD-8-NEXT: global_store_dword v1, v0, s[2:3] sc0 sc1 ; GFX940-PRELOAD-8-NEXT: s_endpgm ; -; GFX90a-NO-PRELOAD-LABEL: v5i8_preload_arg: +; GFX90a-NO-PRELOAD-LABEL: v5i8_kernel_preload_arg: ; GFX90a-NO-PRELOAD: ; %bb.0: ; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 @@ -1625,20 +990,7 @@ define amdgpu_kernel void @v5i8_preload_arg(ptr addrspace(1) nocapture %out, <5 ; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v2, s[0:1] ; GFX90a-NO-PRELOAD-NEXT: s_endpgm ; -; GFX90a-PRELOAD-1-LABEL: v5i8_preload_arg: -; GFX90a-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-1-NEXT: ; %bb.0: -; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, 0 -; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s1 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s0 -; GFX90a-PRELOAD-1-NEXT: global_store_byte v0, v1, s[6:7] offset:4 -; GFX90a-PRELOAD-1-NEXT: global_store_dword v0, v2, s[6:7] -; GFX90a-PRELOAD-1-NEXT: s_endpgm -; -; GFX90a-PRELOAD-2-LABEL: v5i8_preload_arg: +; GFX90a-PRELOAD-2-LABEL: v5i8_kernel_preload_arg: ; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-2-NEXT: ; %bb.0: @@ -1656,25 +1008,7 @@ define amdgpu_kernel void @v5i8_preload_arg(ptr addrspace(1) nocapture %out, <5 ; GFX90a-PRELOAD-2-NEXT: global_store_dword v1, v0, s[6:7] ; GFX90a-PRELOAD-2-NEXT: s_endpgm ; -; GFX90a-PRELOAD-4-LABEL: v5i8_preload_arg: -; GFX90a-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-4-NEXT: ; %bb.0: -; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 8 -; GFX90a-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0 -; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 24 -; GFX90a-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v1, 8, s0 -; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 16 -; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, 0 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s9 -; GFX90a-PRELOAD-4-NEXT: global_store_byte v1, v2, s[6:7] offset:4 -; GFX90a-PRELOAD-4-NEXT: global_store_dword v1, v0, s[6:7] -; GFX90a-PRELOAD-4-NEXT: s_endpgm -; -; GFX90a-PRELOAD-8-LABEL: v5i8_preload_arg: +; GFX90a-PRELOAD-8-LABEL: v5i8_kernel_preload_arg: ; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-8-NEXT: ; %bb.0: @@ -1695,8 +1029,8 @@ define amdgpu_kernel void @v5i8_preload_arg(ptr addrspace(1) nocapture %out, <5 ret void } -define amdgpu_kernel void @v5f64_arg(ptr addrspace(1) nocapture %out, <5 x double> %in) nounwind { -; GFX940-NO-PRELOAD-LABEL: v5f64_arg: +define amdgpu_kernel void @v5f64_kernel_preload_arg(ptr addrspace(1) nocapture %out, <5 x double> %in) nounwind { +; GFX940-NO-PRELOAD-LABEL: v5f64_kernel_preload_arg: ; GFX940-NO-PRELOAD: ; %bb.0: ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x60 ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40 @@ -1718,30 +1052,7 @@ define amdgpu_kernel void @v5f64_arg(ptr addrspace(1) nocapture %out, <5 x doubl ; GFX940-NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[12:13] sc0 sc1 ; GFX940-NO-PRELOAD-NEXT: s_endpgm ; -; GFX940-PRELOAD-1-LABEL: v5f64_arg: -; GFX940-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-1-NEXT: ; %bb.0: -; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x60 -; GFX940-PRELOAD-1-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-PRELOAD-1-NEXT: v_mov_b64_e32 v[2:3], s[12:13] -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s8 -; GFX940-PRELOAD-1-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] offset:32 sc0 sc1 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s9 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s11 -; GFX940-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1 -; GFX940-PRELOAD-1-NEXT: s_nop 1 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s4 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s5 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s6 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s7 -; GFX940-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 -; GFX940-PRELOAD-1-NEXT: s_endpgm -; -; GFX940-PRELOAD-2-LABEL: v5f64_arg: +; GFX940-PRELOAD-2-LABEL: v5f64_kernel_preload_arg: ; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-2-NEXT: ; %bb.0: @@ -1764,30 +1075,7 @@ define amdgpu_kernel void @v5f64_arg(ptr addrspace(1) nocapture %out, <5 x doubl ; GFX940-PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 ; GFX940-PRELOAD-2-NEXT: s_endpgm ; -; GFX940-PRELOAD-4-LABEL: v5f64_arg: -; GFX940-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-4-NEXT: ; %bb.0: -; GFX940-PRELOAD-4-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x60 -; GFX940-PRELOAD-4-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x40 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v4, 0 -; GFX940-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-PRELOAD-4-NEXT: v_mov_b64_e32 v[2:3], s[12:13] -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s8 -; GFX940-PRELOAD-4-NEXT: global_store_dwordx2 v4, v[2:3], s[2:3] offset:32 sc0 sc1 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s9 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s10 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s11 -; GFX940-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] offset:16 sc0 sc1 -; GFX940-PRELOAD-4-NEXT: s_nop 1 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s4 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s5 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s6 -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s7 -; GFX940-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 -; GFX940-PRELOAD-4-NEXT: s_endpgm -; -; GFX940-PRELOAD-8-LABEL: v5f64_arg: +; GFX940-PRELOAD-8-LABEL: v5f64_kernel_preload_arg: ; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-8-NEXT: ; %bb.0: @@ -1810,7 +1098,7 @@ define amdgpu_kernel void @v5f64_arg(ptr addrspace(1) nocapture %out, <5 x doubl ; GFX940-PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 ; GFX940-PRELOAD-8-NEXT: s_endpgm ; -; GFX90a-NO-PRELOAD-LABEL: v5f64_arg: +; GFX90a-NO-PRELOAD-LABEL: v5f64_kernel_preload_arg: ; GFX90a-NO-PRELOAD: ; %bb.0: ; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x60 ; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40 @@ -1832,30 +1120,7 @@ define amdgpu_kernel void @v5f64_arg(ptr addrspace(1) nocapture %out, <5 x doubl ; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX90a-NO-PRELOAD-NEXT: s_endpgm ; -; GFX90a-PRELOAD-1-LABEL: v5f64_arg: -; GFX90a-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-1-NEXT: ; %bb.0: -; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x60 -; GFX90a-PRELOAD-1-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v4, 0 -; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX90a-PRELOAD-1-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1] -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s12 -; GFX90a-PRELOAD-1-NEXT: global_store_dwordx2 v4, v[2:3], s[6:7] offset:32 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s13 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s14 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s15 -; GFX90a-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16 -; GFX90a-PRELOAD-1-NEXT: s_nop 0 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v0, s8 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v1, s9 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, s10 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v3, s11 -; GFX90a-PRELOAD-1-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] -; GFX90a-PRELOAD-1-NEXT: s_endpgm -; -; GFX90a-PRELOAD-2-LABEL: v5f64_arg: +; GFX90a-PRELOAD-2-LABEL: v5f64_kernel_preload_arg: ; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-2-NEXT: ; %bb.0: @@ -1878,30 +1143,7 @@ define amdgpu_kernel void @v5f64_arg(ptr addrspace(1) nocapture %out, <5 x doubl ; GFX90a-PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; GFX90a-PRELOAD-2-NEXT: s_endpgm ; -; GFX90a-PRELOAD-4-LABEL: v5f64_arg: -; GFX90a-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-4-NEXT: ; %bb.0: -; GFX90a-PRELOAD-4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x60 -; GFX90a-PRELOAD-4-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v4, 0 -; GFX90a-PRELOAD-4-NEXT: s_waitcnt lgkmcnt(0) -; GFX90a-PRELOAD-4-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1] -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s12 -; GFX90a-PRELOAD-4-NEXT: global_store_dwordx2 v4, v[2:3], s[6:7] offset:32 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s13 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s14 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s15 -; GFX90a-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] offset:16 -; GFX90a-PRELOAD-4-NEXT: s_nop 0 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v0, s8 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v1, s9 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, s10 -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v3, s11 -; GFX90a-PRELOAD-4-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] -; GFX90a-PRELOAD-4-NEXT: s_endpgm -; -; GFX90a-PRELOAD-8-LABEL: v5f64_arg: +; GFX90a-PRELOAD-8-LABEL: v5f64_kernel_preload_arg: ; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-8-NEXT: ; %bb.0: @@ -1927,8 +1169,8 @@ define amdgpu_kernel void @v5f64_arg(ptr addrspace(1) nocapture %out, <5 x doubl ret void } -define amdgpu_kernel void @v8i8_preload_arg(ptr addrspace(1) %out, <8 x i8> %in) { -; GFX940-NO-PRELOAD-LABEL: v8i8_preload_arg: +define amdgpu_kernel void @v8i8_kernel_preload_arg(ptr addrspace(1) %out, <8 x i8> %in) { +; GFX940-NO-PRELOAD-LABEL: v8i8_kernel_preload_arg: ; GFX940-NO-PRELOAD: ; %bb.0: ; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, 0 @@ -1937,18 +1179,7 @@ define amdgpu_kernel void @v8i8_preload_arg(ptr addrspace(1) %out, <8 x i8> %in) ; GFX940-NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 ; GFX940-NO-PRELOAD-NEXT: s_endpgm ; -; GFX940-PRELOAD-1-LABEL: v8i8_preload_arg: -; GFX940-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-1-NEXT: ; %bb.0: -; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-PRELOAD-1-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 -; GFX940-PRELOAD-1-NEXT: s_endpgm -; -; GFX940-PRELOAD-2-LABEL: v8i8_preload_arg: +; GFX940-PRELOAD-2-LABEL: v8i8_kernel_preload_arg: ; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-2-NEXT: ; %bb.0: @@ -1973,32 +1204,7 @@ define amdgpu_kernel void @v8i8_preload_arg(ptr addrspace(1) %out, <8 x i8> %in) ; GFX940-PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 ; GFX940-PRELOAD-2-NEXT: s_endpgm ; -; GFX940-PRELOAD-4-LABEL: v8i8_preload_arg: -; GFX940-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-4-NEXT: ; %bb.0: -; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s5, 8 -; GFX940-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0 -; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s5, 24 -; GFX940-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v1, 8, s0 -; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s5, 16 -; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 8 -; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX940-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0 -; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 24 -; GFX940-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v2, 8, s0 -; GFX940-PRELOAD-4-NEXT: s_lshr_b32 s0, s4, 16 -; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v2, s0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX940-PRELOAD-4-NEXT: s_nop 0 -; GFX940-PRELOAD-4-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 -; GFX940-PRELOAD-4-NEXT: s_endpgm -; -; GFX940-PRELOAD-8-LABEL: v8i8_preload_arg: +; GFX940-PRELOAD-8-LABEL: v8i8_kernel_preload_arg: ; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX940-PRELOAD-8-NEXT: ; %bb.0: @@ -2023,7 +1229,7 @@ define amdgpu_kernel void @v8i8_preload_arg(ptr addrspace(1) %out, <8 x i8> %in) ; GFX940-PRELOAD-8-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 ; GFX940-PRELOAD-8-NEXT: s_endpgm ; -; GFX90a-NO-PRELOAD-LABEL: v8i8_preload_arg: +; GFX90a-NO-PRELOAD-LABEL: v8i8_kernel_preload_arg: ; GFX90a-NO-PRELOAD: ; %bb.0: ; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, 0 @@ -2032,18 +1238,7 @@ define amdgpu_kernel void @v8i8_preload_arg(ptr addrspace(1) %out, <8 x i8> %in) ; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX90a-NO-PRELOAD-NEXT: s_endpgm ; -; GFX90a-PRELOAD-1-LABEL: v8i8_preload_arg: -; GFX90a-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-1-NEXT: ; %bb.0: -; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0 -; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX90a-PRELOAD-1-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] -; GFX90a-PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] -; GFX90a-PRELOAD-1-NEXT: s_endpgm -; -; GFX90a-PRELOAD-2-LABEL: v8i8_preload_arg: +; GFX90a-PRELOAD-2-LABEL: v8i8_kernel_preload_arg: ; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-2-NEXT: ; %bb.0: @@ -2067,31 +1262,7 @@ define amdgpu_kernel void @v8i8_preload_arg(ptr addrspace(1) %out, <8 x i8> %in) ; GFX90a-PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] ; GFX90a-PRELOAD-2-NEXT: s_endpgm ; -; GFX90a-PRELOAD-4-LABEL: v8i8_preload_arg: -; GFX90a-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-4-NEXT: ; %bb.0: -; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s9, 8 -; GFX90a-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0 -; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s9, 24 -; GFX90a-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v1, 8, s0 -; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s9, 16 -; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 8 -; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX90a-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v0, 8, s0 -; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 24 -; GFX90a-PRELOAD-4-NEXT: v_lshlrev_b16_e64 v2, 8, s0 -; GFX90a-PRELOAD-4-NEXT: s_lshr_b32 s0, s8, 16 -; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v2, s0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX90a-PRELOAD-4-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0 -; GFX90a-PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] -; GFX90a-PRELOAD-4-NEXT: s_endpgm -; -; GFX90a-PRELOAD-8-LABEL: v8i8_preload_arg: +; GFX90a-PRELOAD-8-LABEL: v8i8_kernel_preload_arg: ; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 ; GFX90a-PRELOAD-8-NEXT: ; %bb.0: @@ -2129,17 +1300,6 @@ define amdgpu_kernel void @i64_kernel_preload_arg(ptr addrspace(1) %out, i64 %a) ; GFX940-NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 ; GFX940-NO-PRELOAD-NEXT: s_endpgm ; -; GFX940-PRELOAD-1-LABEL: i64_kernel_preload_arg: -; GFX940-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-1-NEXT: ; %bb.0: -; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-PRELOAD-1-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 -; GFX940-PRELOAD-1-NEXT: s_endpgm -; ; GFX940-PRELOAD-2-LABEL: i64_kernel_preload_arg: ; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 @@ -2149,15 +1309,6 @@ define amdgpu_kernel void @i64_kernel_preload_arg(ptr addrspace(1) %out, i64 %a) ; GFX940-PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 ; GFX940-PRELOAD-2-NEXT: s_endpgm ; -; GFX940-PRELOAD-4-LABEL: i64_kernel_preload_arg: -; GFX940-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-4-NEXT: ; %bb.0: -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-PRELOAD-4-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GFX940-PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 -; GFX940-PRELOAD-4-NEXT: s_endpgm -; ; GFX940-PRELOAD-8-LABEL: i64_kernel_preload_arg: ; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 @@ -2177,17 +1328,6 @@ define amdgpu_kernel void @i64_kernel_preload_arg(ptr addrspace(1) %out, i64 %a) ; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX90a-NO-PRELOAD-NEXT: s_endpgm ; -; GFX90a-PRELOAD-1-LABEL: i64_kernel_preload_arg: -; GFX90a-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-1-NEXT: ; %bb.0: -; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0 -; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX90a-PRELOAD-1-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] -; GFX90a-PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] -; GFX90a-PRELOAD-1-NEXT: s_endpgm -; ; GFX90a-PRELOAD-2-LABEL: i64_kernel_preload_arg: ; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 @@ -2197,15 +1337,6 @@ define amdgpu_kernel void @i64_kernel_preload_arg(ptr addrspace(1) %out, i64 %a) ; GFX90a-PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] ; GFX90a-PRELOAD-2-NEXT: s_endpgm ; -; GFX90a-PRELOAD-4-LABEL: i64_kernel_preload_arg: -; GFX90a-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-4-NEXT: ; %bb.0: -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0 -; GFX90a-PRELOAD-4-NEXT: v_pk_mov_b32 v[0:1], s[8:9], s[8:9] op_sel:[0,1] -; GFX90a-PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] -; GFX90a-PRELOAD-4-NEXT: s_endpgm -; ; GFX90a-PRELOAD-8-LABEL: i64_kernel_preload_arg: ; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 @@ -2229,17 +1360,6 @@ define amdgpu_kernel void @f64_kernel_preload_arg(ptr addrspace(1) %out, double ; GFX940-NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 ; GFX940-NO-PRELOAD-NEXT: s_endpgm ; -; GFX940-PRELOAD-1-LABEL: f64_kernel_preload_arg: -; GFX940-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-1-NEXT: ; %bb.0: -; GFX940-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x8 -; GFX940-PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-PRELOAD-1-NEXT: v_mov_b64_e32 v[0:1], s[0:1] -; GFX940-PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 -; GFX940-PRELOAD-1-NEXT: s_endpgm -; ; GFX940-PRELOAD-2-LABEL: f64_kernel_preload_arg: ; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 @@ -2249,15 +1369,6 @@ define amdgpu_kernel void @f64_kernel_preload_arg(ptr addrspace(1) %out, double ; GFX940-PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 ; GFX940-PRELOAD-2-NEXT: s_endpgm ; -; GFX940-PRELOAD-4-LABEL: f64_kernel_preload_arg: -; GFX940-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX940-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX940-PRELOAD-4-NEXT: ; %bb.0: -; GFX940-PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0 -; GFX940-PRELOAD-4-NEXT: v_mov_b64_e32 v[0:1], s[4:5] -; GFX940-PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1 -; GFX940-PRELOAD-4-NEXT: s_endpgm -; ; GFX940-PRELOAD-8-LABEL: f64_kernel_preload_arg: ; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 @@ -2277,17 +1388,6 @@ define amdgpu_kernel void @f64_kernel_preload_arg(ptr addrspace(1) %out, double ; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX90a-NO-PRELOAD-NEXT: s_endpgm ; -; GFX90a-PRELOAD-1-LABEL: f64_kernel_preload_arg: -; GFX90a-PRELOAD-1: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-1-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-1-NEXT: ; %bb.0: -; GFX90a-PRELOAD-1-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 -; GFX90a-PRELOAD-1-NEXT: v_mov_b32_e32 v2, 0 -; GFX90a-PRELOAD-1-NEXT: s_waitcnt lgkmcnt(0) -; GFX90a-PRELOAD-1-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] -; GFX90a-PRELOAD-1-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] -; GFX90a-PRELOAD-1-NEXT: s_endpgm -; ; GFX90a-PRELOAD-2-LABEL: f64_kernel_preload_arg: ; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 @@ -2297,15 +1397,6 @@ define amdgpu_kernel void @f64_kernel_preload_arg(ptr addrspace(1) %out, double ; GFX90a-PRELOAD-2-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] ; GFX90a-PRELOAD-2-NEXT: s_endpgm ; -; GFX90a-PRELOAD-4-LABEL: f64_kernel_preload_arg: -; GFX90a-PRELOAD-4: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. -; GFX90a-PRELOAD-4-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 -; GFX90a-PRELOAD-4-NEXT: ; %bb.0: -; GFX90a-PRELOAD-4-NEXT: v_mov_b32_e32 v2, 0 -; GFX90a-PRELOAD-4-NEXT: v_pk_mov_b32 v[0:1], s[8:9], s[8:9] op_sel:[0,1] -; GFX90a-PRELOAD-4-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] -; GFX90a-PRELOAD-4-NEXT: s_endpgm -; ; GFX90a-PRELOAD-8-LABEL: f64_kernel_preload_arg: ; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. ; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 @@ -2317,3 +1408,1115 @@ define amdgpu_kernel void @f64_kernel_preload_arg(ptr addrspace(1) %out, double store double %in, ptr addrspace(1) %out ret void } + +define amdgpu_kernel void @half_kernel_preload_arg(ptr addrspace(1) %out, half %in) { +; GFX940-NO-PRELOAD-LABEL: half_kernel_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-NO-PRELOAD-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: half_kernel_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-PRELOAD-2-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: half_kernel_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-PRELOAD-8-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: half_kernel_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2 +; GFX90a-NO-PRELOAD-NEXT: global_store_short v0, v1, s[0:1] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: half_kernel_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s8 +; GFX90a-PRELOAD-2-NEXT: global_store_short v0, v1, s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: half_kernel_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s8 +; GFX90a-PRELOAD-8-NEXT: global_store_short v0, v1, s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm + store half %in, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @bfloat_kernel_preload_arg(ptr addrspace(1) %out, bfloat %in) { +; GFX940-NO-PRELOAD-LABEL: bfloat_kernel_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-NO-PRELOAD-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: bfloat_kernel_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-PRELOAD-2-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: bfloat_kernel_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-PRELOAD-8-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: bfloat_kernel_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2 +; GFX90a-NO-PRELOAD-NEXT: global_store_short v0, v1, s[0:1] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: bfloat_kernel_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s8 +; GFX90a-PRELOAD-2-NEXT: global_store_short v0, v1, s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: bfloat_kernel_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s8 +; GFX90a-PRELOAD-8-NEXT: global_store_short v0, v1, s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm + store bfloat %in, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v2bfloat_kernel_preload_arg(ptr addrspace(1) %out, <2 x bfloat> %in) { +; GFX940-NO-PRELOAD-LABEL: v2bfloat_kernel_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: v2bfloat_kernel_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: v2bfloat_kernel_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: v2bfloat_kernel_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2 +; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v1, s[0:1] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: v2bfloat_kernel_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s8 +; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: v2bfloat_kernel_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s8 +; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm + store <2 x bfloat> %in, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v3bfloat_kernel_preload_arg(ptr addrspace(1) %out, <3 x bfloat> %in) { +; GFX940-NO-PRELOAD-LABEL: v3bfloat_kernel_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2 +; GFX940-NO-PRELOAD-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: v3bfloat_kernel_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-PRELOAD-2-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: v3bfloat_kernel_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-PRELOAD-8-NEXT: global_store_short v0, v1, s[2:3] offset:4 sc0 sc1 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: v3bfloat_kernel_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2 +; GFX90a-NO-PRELOAD-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v2, s[0:1] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: v3bfloat_kernel_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s9 +; GFX90a-PRELOAD-2-NEXT: global_store_short v0, v1, s[6:7] offset:4 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s8 +; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: v3bfloat_kernel_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s9 +; GFX90a-PRELOAD-8-NEXT: global_store_short v0, v1, s[6:7] offset:4 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s8 +; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm + store <3 x bfloat> %in, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v6bfloat_kernel_preload_arg(ptr addrspace(1) %out, <6 x bfloat> %in) { +; GFX940-NO-PRELOAD-LABEL: v6bfloat_kernel_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s4 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s6 +; GFX940-NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: v6bfloat_kernel_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s6 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s7 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s8 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v3, 0 +; GFX940-PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: v6bfloat_kernel_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s6 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s7 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s8 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v3, 0 +; GFX940-PRELOAD-8-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: v6bfloat_kernel_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s1 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2 +; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: v6bfloat_kernel_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s10 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s11 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s12 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v3, 0 +; GFX90a-PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: v6bfloat_kernel_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s10 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s11 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s12 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v3, 0 +; GFX90a-PRELOAD-8-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm + store <6 x bfloat> %in, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @half_v7bfloat_kernel_preload_arg(ptr addrspace(1) %out, half %in, <7 x bfloat> %in2, ptr addrspace(1) %out2) { +; GFX940-NO-PRELOAD-LABEL: half_v7bfloat_kernel_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dword s10, s[0:1], 0x8 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x20 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s10 +; GFX940-NO-PRELOAD-NEXT: global_store_short v3, v0, s[2:3] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s7 +; GFX940-NO-PRELOAD-NEXT: global_store_short v3, v0, s[8:9] offset:12 sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s6 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s4 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[8:9] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: half_v7bfloat_kernel_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x10 +; GFX940-PRELOAD-2-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x20 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v3, 0 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s4 +; GFX940-PRELOAD-2-NEXT: global_store_short v3, v0, s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s11 +; GFX940-PRELOAD-2-NEXT: global_store_short v3, v0, s[6:7] offset:12 sc0 sc1 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s10 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s8 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s9 +; GFX940-PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: half_v7bfloat_kernel_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v3, 0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s4 +; GFX940-PRELOAD-8-NEXT: global_store_short v3, v0, s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s9 +; GFX940-PRELOAD-8-NEXT: global_store_short v3, v0, s[10:11] offset:12 sc0 sc1 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s8 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s6 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s7 +; GFX940-PRELOAD-8-NEXT: global_store_dwordx3 v3, v[0:2], s[10:11] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: half_v7bfloat_kernel_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dword s10, s[4:5], 0x8 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x20 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s10 +; GFX90a-NO-PRELOAD-NEXT: global_store_short v3, v0, s[6:7] +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s3 +; GFX90a-NO-PRELOAD-NEXT: global_store_short v3, v0, s[8:9] offset:12 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s1 +; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[8:9] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: half_v7bfloat_kernel_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10 +; GFX90a-PRELOAD-2-NEXT: s_load_dwordx2 s[10:11], s[4:5], 0x20 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v3, 0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s8 +; GFX90a-PRELOAD-2-NEXT: global_store_short v3, v0, s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s3 +; GFX90a-PRELOAD-2-NEXT: global_store_short v3, v0, s[10:11] offset:12 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s2 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s1 +; GFX90a-PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[10:11] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: half_v7bfloat_kernel_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x20 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v3, 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s8 +; GFX90a-PRELOAD-8-NEXT: global_store_short v3, v0, s[6:7] +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s13 +; GFX90a-PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-8-NEXT: global_store_short v3, v0, s[0:1] offset:12 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s12 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s10 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s11 +; GFX90a-PRELOAD-8-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX90a-PRELOAD-8-NEXT: s_endpgm + store half %in, ptr addrspace(1) %out + store <7 x bfloat> %in2, ptr addrspace(1) %out2 + ret void +} + +define amdgpu_kernel void @i1_kernel_preload_arg(ptr addrspace(1) %out, i1 %in) { +; GFX940-NO-PRELOAD-LABEL: i1_kernel_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dword s4, s[0:1], 0x8 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: s_and_b32 s0, s4, 1 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-NO-PRELOAD-NEXT: global_store_byte v0, v1, s[2:3] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: i1_kernel_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: s_and_b32 s0, s4, 1 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-2-NEXT: global_store_byte v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: i1_kernel_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: s_and_b32 s0, s4, 1 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0 +; GFX940-PRELOAD-8-NEXT: global_store_byte v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: i1_kernel_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: s_and_b32 s2, s2, 1 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2 +; GFX90a-NO-PRELOAD-NEXT: global_store_byte v0, v1, s[0:1] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: i1_kernel_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: s_and_b32 s0, s8, 1 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-2-NEXT: global_store_byte v0, v1, s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: i1_kernel_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: s_and_b32 s0, s8, 1 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s0 +; GFX90a-PRELOAD-8-NEXT: global_store_byte v0, v1, s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm + store i1 %in, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @fp128_kernel_preload_arg(ptr addrspace(1) %out, fp128 %in) { +; GFX940-NO-PRELOAD-LABEL: fp128_kernel_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v4, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b64_e32 v[0:1], s[4:5] +; GFX940-NO-PRELOAD-NEXT: v_mov_b64_e32 v[2:3], s[6:7] +; GFX940-NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: fp128_kernel_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v4, 0 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s6 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s7 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s8 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v3, s9 +; GFX940-PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: fp128_kernel_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v4, 0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s6 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s7 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s8 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v3, s9 +; GFX940-PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: fp128_kernel_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v4, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] +; GFX90a-NO-PRELOAD-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] +; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: fp128_kernel_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v4, 0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s10 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s11 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s12 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v3, s13 +; GFX90a-PRELOAD-2-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: fp128_kernel_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v4, 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s10 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s11 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s12 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v3, s13 +; GFX90a-PRELOAD-8-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm + store fp128 %in, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v7i8_kernel_preload_arg(ptr addrspace(1) %out, <7 x i8> %in) { +; GFX940-NO-PRELOAD-LABEL: v7i8_kernel_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2 +; GFX940-NO-PRELOAD-NEXT: global_store_byte_d16_hi v0, v1, s[0:1] offset:6 sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: global_store_short v0, v1, s[0:1] offset:4 sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v2, s[0:1] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: v7i8_kernel_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 8 +; GFX940-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 24 +; GFX940-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v1, 8, s0 +; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s4, 16 +; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-2-NEXT: s_lshr_b32 s0, s5, 8 +; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX940-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v1, 8, s0 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, 0 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v3, s5 +; GFX940-PRELOAD-2-NEXT: v_or_b32_sdwa v1, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-2-NEXT: global_store_byte_d16_hi v2, v3, s[2:3] offset:6 sc0 sc1 +; GFX940-PRELOAD-2-NEXT: global_store_short v2, v1, s[2:3] offset:4 sc0 sc1 +; GFX940-PRELOAD-2-NEXT: global_store_dword v2, v0, s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: v7i8_kernel_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 8 +; GFX940-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 24 +; GFX940-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v1, 8, s0 +; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 16 +; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s5, 8 +; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX940-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v1, 8, s0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, 0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v3, s5 +; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v1, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-8-NEXT: global_store_byte_d16_hi v2, v3, s[2:3] offset:6 sc0 sc1 +; GFX940-PRELOAD-8-NEXT: global_store_short v2, v1, s[2:3] offset:4 sc0 sc1 +; GFX940-PRELOAD-8-NEXT: global_store_dword v2, v0, s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: v7i8_kernel_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2 +; GFX90a-NO-PRELOAD-NEXT: global_store_byte_d16_hi v0, v1, s[0:1] offset:6 +; GFX90a-NO-PRELOAD-NEXT: global_store_short v0, v1, s[0:1] offset:4 +; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v2, s[0:1] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: v7i8_kernel_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s8, 8 +; GFX90a-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s8, 24 +; GFX90a-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v1, 8, s0 +; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s8, 16 +; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-2-NEXT: s_lshr_b32 s0, s9, 8 +; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX90a-PRELOAD-2-NEXT: v_lshlrev_b16_e64 v1, 8, s0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, 0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v3, s9 +; GFX90a-PRELOAD-2-NEXT: v_or_b32_sdwa v1, s9, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-2-NEXT: global_store_byte_d16_hi v2, v3, s[6:7] offset:6 +; GFX90a-PRELOAD-2-NEXT: global_store_short v2, v1, s[6:7] offset:4 +; GFX90a-PRELOAD-2-NEXT: global_store_dword v2, v0, s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: v7i8_kernel_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s8, 8 +; GFX90a-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s8, 24 +; GFX90a-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v1, 8, s0 +; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s8, 16 +; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v1, s0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s9, 8 +; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX90a-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v1, 8, s0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v3, s9 +; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v1, s9, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-8-NEXT: global_store_byte_d16_hi v2, v3, s[6:7] offset:6 +; GFX90a-PRELOAD-8-NEXT: global_store_short v2, v1, s[6:7] offset:4 +; GFX90a-PRELOAD-8-NEXT: global_store_dword v2, v0, s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm + store <7 x i8> %in, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v7half_kernel_preload_arg(ptr addrspace(1) %out, <7 x half> %in) { +; GFX940-NO-PRELOAD-LABEL: v7half_kernel_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s7 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s6 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s4 +; GFX940-NO-PRELOAD-NEXT: global_store_short v3, v1, s[2:3] offset:12 sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: v7half_kernel_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v3, 0 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s9 +; GFX940-PRELOAD-2-NEXT: global_store_short v3, v0, s[2:3] offset:12 sc0 sc1 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s8 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s6 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s7 +; GFX940-PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: v7half_kernel_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v3, 0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s9 +; GFX940-PRELOAD-8-NEXT: global_store_short v3, v0, s[2:3] offset:12 sc0 sc1 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s8 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s6 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s7 +; GFX940-PRELOAD-8-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: v7half_kernel_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s3 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s0 +; GFX90a-NO-PRELOAD-NEXT: global_store_short v3, v1, s[6:7] offset:12 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s1 +; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: v7half_kernel_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v3, 0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s13 +; GFX90a-PRELOAD-2-NEXT: global_store_short v3, v0, s[6:7] offset:12 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s12 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s10 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s11 +; GFX90a-PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: v7half_kernel_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v3, 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s13 +; GFX90a-PRELOAD-8-NEXT: global_store_short v3, v0, s[6:7] offset:12 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s12 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s10 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s11 +; GFX90a-PRELOAD-8-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_endpgm + store <7 x half> %in, ptr addrspace(1) %out + ret void +} + +; Test when previous argument was not dword aligned. +define amdgpu_kernel void @i16_i32_kernel_preload_arg(ptr addrspace(1) %out, i16 %in, i32 %in2, ptr addrspace(1) %out2) { +; GFX940-NO-PRELOAD-LABEL: i16_i32_kernel_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x10 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s6 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s7 +; GFX940-NO-PRELOAD-NEXT: global_store_short v0, v1, s[4:5] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: global_store_dword v0, v2, s[2:3] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: i16_i32_kernel_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: s_load_dword s5, s[0:1], 0xc +; GFX940-PRELOAD-2-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x10 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-PRELOAD-2-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-PRELOAD-2-NEXT: global_store_dword v0, v1, s[6:7] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: i16_i32_kernel_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-PRELOAD-8-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-PRELOAD-8-NEXT: global_store_dword v0, v1, s[6:7] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: i16_i32_kernel_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s2 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s3 +; GFX90a-NO-PRELOAD-NEXT: global_store_short v0, v1, s[0:1] +; GFX90a-NO-PRELOAD-NEXT: global_store_dword v0, v2, s[6:7] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: i16_i32_kernel_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: s_load_dword s2, s[4:5], 0xc +; GFX90a-PRELOAD-2-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s8 +; GFX90a-PRELOAD-2-NEXT: global_store_short v0, v1, s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s2 +; GFX90a-PRELOAD-2-NEXT: global_store_dword v0, v1, s[0:1] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: i16_i32_kernel_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s8 +; GFX90a-PRELOAD-8-NEXT: global_store_short v0, v1, s[6:7] +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s9 +; GFX90a-PRELOAD-8-NEXT: global_store_dword v0, v1, s[10:11] +; GFX90a-PRELOAD-8-NEXT: s_endpgm + store i16 %in, ptr addrspace(1) %out + store i32 %in2, ptr addrspace(1) %out2 + ret void +} + +define amdgpu_kernel void @i16_v3i32_kernel_preload_arg(ptr addrspace(1) %out, i16 %in, <3 x i32> %in2, ptr addrspace(1) %out2) { +; GFX940-NO-PRELOAD-LABEL: i16_v3i32_kernel_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x10 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: s_load_dword s7, s[0:1], 0x8 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x20 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v4, s7 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s4 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s6 +; GFX940-NO-PRELOAD-NEXT: global_store_short v3, v4, s[2:3] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[8:9] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: i16_v3i32_kernel_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x10 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v3, 0 +; GFX940-PRELOAD-2-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x20 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v4, s4 +; GFX940-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s8 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s9 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s10 +; GFX940-PRELOAD-2-NEXT: global_store_short v3, v4, s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: i16_v3i32_kernel_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v3, 0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v4, s4 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s6 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s7 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s8 +; GFX940-PRELOAD-8-NEXT: global_store_short v3, v4, s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: global_store_dwordx3 v3, v[0:2], s[10:11] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: i16_v3i32_kernel_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: s_load_dword s3, s[4:5], 0x8 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x20 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v3, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v4, s3 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, s0 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s1 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v2, s2 +; GFX90a-NO-PRELOAD-NEXT: global_store_short v3, v4, s[6:7] +; GFX90a-NO-PRELOAD-NEXT: global_store_dwordx3 v3, v[0:2], s[8:9] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: i16_v3i32_kernel_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x10 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v3, 0 +; GFX90a-PRELOAD-2-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x20 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v4, s8 +; GFX90a-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, s0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s1 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v2, s2 +; GFX90a-PRELOAD-2-NEXT: global_store_short v3, v4, s[6:7] +; GFX90a-PRELOAD-2-NEXT: global_store_dwordx3 v3, v[0:2], s[4:5] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: i16_v3i32_kernel_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x20 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v3, 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v4, s8 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, s10 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s11 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s12 +; GFX90a-PRELOAD-8-NEXT: global_store_short v3, v4, s[6:7] +; GFX90a-PRELOAD-8-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-8-NEXT: global_store_dwordx3 v3, v[0:2], s[0:1] +; GFX90a-PRELOAD-8-NEXT: s_endpgm + store i16 %in, ptr addrspace(1) %out + store <3 x i32> %in2, ptr addrspace(1) %out2 + ret void +} + +define amdgpu_kernel void @i16_i16_kernel_preload_arg(ptr addrspace(1) %out, i16 %in, i16 %in2, ptr addrspace(1) %out2) { +; GFX940-NO-PRELOAD-LABEL: i16_i16_kernel_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dword s6, s[0:1], 0x8 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x10 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s6 +; GFX940-NO-PRELOAD-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: global_store_short_d16_hi v0, v1, s[4:5] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: i16_i16_kernel_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: s_load_dword s5, s[0:1], 0x8 +; GFX940-PRELOAD-2-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x10 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-PRELOAD-2-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-PRELOAD-2-NEXT: global_store_short_d16_hi v0, v1, s[6:7] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: i16_i16_kernel_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-PRELOAD-8-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: global_store_short_d16_hi v0, v1, s[6:7] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: i16_i16_kernel_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dword s6, s[4:5], 0x8 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s6 +; GFX90a-NO-PRELOAD-NEXT: global_store_short v0, v1, s[0:1] +; GFX90a-NO-PRELOAD-NEXT: global_store_short_d16_hi v0, v1, s[2:3] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: i16_i16_kernel_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX90a-PRELOAD-2-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s8 +; GFX90a-PRELOAD-2-NEXT: global_store_short v0, v1, s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s2 +; GFX90a-PRELOAD-2-NEXT: global_store_short_d16_hi v0, v1, s[0:1] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: i16_i16_kernel_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, s8 +; GFX90a-PRELOAD-8-NEXT: global_store_short v0, v1, s[6:7] +; GFX90a-PRELOAD-8-NEXT: global_store_short_d16_hi v0, v1, s[10:11] +; GFX90a-PRELOAD-8-NEXT: s_endpgm + store i16 %in, ptr addrspace(1) %out + store i16 %in2, ptr addrspace(1) %out2 + ret void +} + +define amdgpu_kernel void @i16_v2i8_kernel_preload_arg(ptr addrspace(1) %out, i16 %in, <2 x i8> %in2, ptr addrspace(1) %out2) { +; GFX940-NO-PRELOAD-LABEL: i16_v2i8_kernel_preload_arg: +; GFX940-NO-PRELOAD: ; %bb.0: +; GFX940-NO-PRELOAD-NEXT: s_load_dword s6, s[0:1], 0x8 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 +; GFX940-NO-PRELOAD-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x10 +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s6 +; GFX940-NO-PRELOAD-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: global_store_short_d16_hi v0, v1, s[4:5] sc0 sc1 +; GFX940-NO-PRELOAD-NEXT: s_endpgm +; +; GFX940-PRELOAD-2-LABEL: i16_v2i8_kernel_preload_arg: +; GFX940-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-2-NEXT: ; %bb.0: +; GFX940-PRELOAD-2-NEXT: s_load_dword s5, s[0:1], 0x8 +; GFX940-PRELOAD-2-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x10 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-PRELOAD-2-NEXT: global_store_short v0, v1, s[2:3] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s5 +; GFX940-PRELOAD-2-NEXT: global_store_short_d16_hi v0, v1, s[6:7] sc0 sc1 +; GFX940-PRELOAD-2-NEXT: s_endpgm +; +; GFX940-PRELOAD-8-LABEL: i16_v2i8_kernel_preload_arg: +; GFX940-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX940-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX940-PRELOAD-8-NEXT: ; %bb.0: +; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 24 +; GFX940-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX940-PRELOAD-8-NEXT: s_lshr_b32 s0, s4, 16 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v1, 0 +; GFX940-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s4 +; GFX940-PRELOAD-8-NEXT: v_or_b32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX940-PRELOAD-8-NEXT: global_store_short v1, v2, s[2:3] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: global_store_short v1, v0, s[6:7] sc0 sc1 +; GFX940-PRELOAD-8-NEXT: s_endpgm +; +; GFX90a-NO-PRELOAD-LABEL: i16_v2i8_kernel_preload_arg: +; GFX90a-NO-PRELOAD: ; %bb.0: +; GFX90a-NO-PRELOAD-NEXT: s_load_dword s6, s[4:5], 0x8 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX90a-NO-PRELOAD-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x10 +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-NO-PRELOAD-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-NO-PRELOAD-NEXT: v_mov_b32_e32 v1, s6 +; GFX90a-NO-PRELOAD-NEXT: global_store_short v0, v1, s[0:1] +; GFX90a-NO-PRELOAD-NEXT: global_store_short_d16_hi v0, v1, s[2:3] +; GFX90a-NO-PRELOAD-NEXT: s_endpgm +; +; GFX90a-PRELOAD-2-LABEL: i16_v2i8_kernel_preload_arg: +; GFX90a-PRELOAD-2: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-2-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-2-NEXT: ; %bb.0: +; GFX90a-PRELOAD-2-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX90a-PRELOAD-2-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v0, 0 +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s8 +; GFX90a-PRELOAD-2-NEXT: global_store_short v0, v1, s[6:7] +; GFX90a-PRELOAD-2-NEXT: s_waitcnt lgkmcnt(0) +; GFX90a-PRELOAD-2-NEXT: v_mov_b32_e32 v1, s2 +; GFX90a-PRELOAD-2-NEXT: global_store_short_d16_hi v0, v1, s[0:1] +; GFX90a-PRELOAD-2-NEXT: s_endpgm +; +; GFX90a-PRELOAD-8-LABEL: i16_v2i8_kernel_preload_arg: +; GFX90a-PRELOAD-8: s_trap 2 ; Kernarg preload header. Trap with incompatible firmware that doesn't support preloading kernel arguments. +; GFX90a-PRELOAD-8-NEXT: .fill 63, 4, 0xbf800000 ; s_nop 0 +; GFX90a-PRELOAD-8-NEXT: ; %bb.0: +; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s8, 24 +; GFX90a-PRELOAD-8-NEXT: v_lshlrev_b16_e64 v0, 8, s0 +; GFX90a-PRELOAD-8-NEXT: s_lshr_b32 s0, s8, 16 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v1, 0 +; GFX90a-PRELOAD-8-NEXT: v_mov_b32_e32 v2, s8 +; GFX90a-PRELOAD-8-NEXT: v_or_b32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX90a-PRELOAD-8-NEXT: global_store_short v1, v2, s[6:7] +; GFX90a-PRELOAD-8-NEXT: global_store_short v1, v0, s[10:11] +; GFX90a-PRELOAD-8-NEXT: s_endpgm + store i16 %in, ptr addrspace(1) %out + store <2 x i8> %in2, ptr addrspace(1) %out2 + ret void +}