diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 909b0e2f1c9861..0f84acba087b77 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -387,8 +387,7 @@ bool GCNTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, // them later if they may access private memory. We don't have enough context // here, and legalization can handle it. if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) { - return (Alignment >= 4 || ST->hasUnalignedScratchAccessEnabled() || - ST->enableFlatScratch()) && + return (Alignment >= 4 || ST->hasUnalignedScratchAccessEnabled()) && ChainSizeInBytes <= ST->getMaxPrivateElementSize(); } return true; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 25af9803040bac..bc4c65fcf8425b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1841,8 +1841,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl( if (IsFast) *IsFast = AlignedBy4; - return AlignedBy4 || Subtarget->enableFlatScratch() || - Subtarget->hasUnalignedScratchAccessEnabled(); + return AlignedBy4 || Subtarget->hasUnalignedScratchAccessEnabled(); } // So long as they are correct, wide global memory operations perform better diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll index 688146a6000e73..d7e3db17404ea8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll @@ -2432,23 +2432,93 @@ define void @store_load_i64_unaligned(ptr addrspace(5) nocapture %arg) { ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_mov_b32_e32 v1, 15 -; GFX12-NEXT: v_mov_b32_e32 v2, 0 +; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_mov_b32 v2, 0 ; GFX12-NEXT: s_wait_storecnt 0x0 -; GFX12-NEXT: scratch_store_b64 v0, v[1:2], off scope:SCOPE_SYS +; GFX12-NEXT: scratch_store_b8 v0, v1, off scope:SCOPE_SYS ; GFX12-NEXT: s_wait_storecnt 0x0 -; GFX12-NEXT: scratch_load_b64 v[0:1], v0, off scope:SCOPE_SYS +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:1 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:3 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:4 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:5 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:6 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:7 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:1 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:2 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:3 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:4 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:5 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:6 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v0, v0, off offset:7 scope:SCOPE_SYS ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; UNALIGNED_GFX9-LABEL: store_load_i64_unaligned: ; UNALIGNED_GFX9: ; %bb.0: ; %bb ; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 15 -; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v2, 0 -; UNALIGNED_GFX9-NEXT: scratch_store_dwordx2 v0, v[1:2], off +; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v4, 15 +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v1, 4, v0 +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v2, 2, v0 +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v3, 1, v0 +; UNALIGNED_GFX9-NEXT: scratch_store_byte v0, v4, off ; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX9-NEXT: scratch_load_dwordx2 v[0:1], v0, off glc +; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v4, 0 +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v6, 6, v0 +; UNALIGNED_GFX9-NEXT: scratch_store_byte v3, v4, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v5, 3, v0 +; UNALIGNED_GFX9-NEXT: scratch_store_byte v2, v4, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_store_byte v5, v4, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v7, 5, v0 +; UNALIGNED_GFX9-NEXT: scratch_store_byte v1, v4, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_store_byte v7, v4, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v8, 7, v0 +; UNALIGNED_GFX9-NEXT: scratch_store_byte v6, v4, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_store_byte v8, v4, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v4, v0, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr6 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr1 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr3 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr5 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr8 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr7 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr2 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr0 +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v4, v3, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v4, v2, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v4, v5, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v4, v1, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v4, v7, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v4, v6, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v4, v8, off glc ; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -2457,30 +2527,143 @@ define void @store_load_i64_unaligned(ptr addrspace(5) nocapture %arg) { ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15 ; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v2, 0 -; UNALIGNED_GFX10-NEXT: scratch_store_dwordx2 v0, v[1:2], off +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v3, 4, v0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v5, 2, v0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v0, v1, off ; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; UNALIGNED_GFX10-NEXT: scratch_load_dwordx2 v[0:1], v0, off glc dlc +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v1, 3, v0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v6, 5, v0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v7, 6, v0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v8, 7, v0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v4, v2, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v5, v2, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v1, v2, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v3, v2, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v6, v2, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v7, v2, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v8, v2, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v0, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v4, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v5, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v1, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v3, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v6, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v7, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v8, off glc dlc ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX10-NEXT: s_setpc_b64 s[30:31] ; ; UNALIGNED_GFX940-LABEL: store_load_i64_unaligned: ; UNALIGNED_GFX940: ; %bb.0: ; %bb ; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; UNALIGNED_GFX940-NEXT: v_mov_b64_e32 v[2:3], 15 -; UNALIGNED_GFX940-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1 +; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v4, 15 +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v1, 4, v0 +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v2, 2, v0 +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v3, 1, v0 +; UNALIGNED_GFX940-NEXT: scratch_store_byte v0, v4, off sc0 sc1 ; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v4, 0 +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v6, 6, v0 +; UNALIGNED_GFX940-NEXT: scratch_store_byte v3, v4, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v5, 3, v0 +; UNALIGNED_GFX940-NEXT: scratch_store_byte v2, v4, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_store_byte v5, v4, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v7, 5, v0 +; UNALIGNED_GFX940-NEXT: scratch_store_byte v1, v4, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_store_byte v7, v4, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v8, 7, v0 +; UNALIGNED_GFX940-NEXT: scratch_store_byte v6, v4, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_store_byte v8, v4, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v0, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr6 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr1 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr3 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr5 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr8 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr7 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr2 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr0 +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v2, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v5, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v1, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v7, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v6, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v4, v8, off sc0 sc1 ; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX940-NEXT: s_setpc_b64 s[30:31] ; ; UNALIGNED_GFX11-LABEL: store_load_i64_unaligned: ; UNALIGNED_GFX11: ; %bb.0: ; %bb ; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; UNALIGNED_GFX11-NEXT: v_mov_b32_e32 v1, 15 -; UNALIGNED_GFX11-NEXT: v_mov_b32_e32 v2, 0 -; UNALIGNED_GFX11-NEXT: scratch_store_b64 v0, v[1:2], off dlc +; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_mov_b32 v2, 0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v4, 1, v0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v3, 4, v0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v5, 2, v0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v0, v1, off dlc ; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; UNALIGNED_GFX11-NEXT: scratch_load_b64 v[0:1], v0, off glc dlc +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v6, 5, v0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v7, 6, v0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v8, 7, v0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v4, v2, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v5, v2, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v1, v2, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v3, v2, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v6, v2, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v7, v2, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v8, v2, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v0, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v4, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v5, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v1, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v3, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v6, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v7, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v8, off glc dlc ; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -2491,12 +2674,39 @@ define void @store_load_i64_unaligned(ptr addrspace(5) nocapture %arg) { ; UNALIGNED_GFX12-NEXT: s_wait_samplecnt 0x0 ; UNALIGNED_GFX12-NEXT: s_wait_bvhcnt 0x0 ; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0 -; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v1, 15 -; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v2, 0 +; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_mov_b32 v2, 0 ; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 -; UNALIGNED_GFX12-NEXT: scratch_store_b64 v0, v[1:2], off scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v1, off scope:SCOPE_SYS ; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 -; UNALIGNED_GFX12-NEXT: scratch_load_b64 v[0:1], v0, off scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:1 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:3 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:4 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:5 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:6 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:7 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:1 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:2 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:3 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:4 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:5 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:6 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v0, v0, off offset:7 scope:SCOPE_SYS ; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 ; UNALIGNED_GFX12-NEXT: s_setpc_b64 s[30:31] bb: @@ -2572,75 +2782,350 @@ define void @store_load_v3i32_unaligned(ptr addrspace(5) nocapture %arg) { ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_mov_b32 s2, 3 -; GFX12-NEXT: s_mov_b32 s1, 2 -; GFX12-NEXT: s_mov_b32 s0, 1 -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: v_dual_mov_b32 v3, s2 :: v_dual_mov_b32 v2, s1 -; GFX12-NEXT: v_mov_b32_e32 v1, s0 +; GFX12-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 0 +; GFX12-NEXT: v_mov_b32_e32 v3, 2 ; GFX12-NEXT: s_wait_storecnt 0x0 -; GFX12-NEXT: scratch_store_b96 v0, v[1:3], off scope:SCOPE_SYS +; GFX12-NEXT: scratch_store_b8 v0, v1, off scope:SCOPE_SYS ; GFX12-NEXT: s_wait_storecnt 0x0 -; GFX12-NEXT: scratch_load_b96 v[0:2], v0, off scope:SCOPE_SYS +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:1 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:3 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: v_mov_b32_e32 v1, 3 +; GFX12-NEXT: scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:5 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:6 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:7 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v1, off offset:8 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:9 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:10 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:11 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:1 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:2 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:3 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:4 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:5 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:6 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:7 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:8 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:9 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:10 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v0, v0, off offset:11 scope:SCOPE_SYS ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; UNALIGNED_GFX9-LABEL: store_load_v3i32_unaligned: ; UNALIGNED_GFX9: ; %bb.0: ; %bb ; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; UNALIGNED_GFX9-NEXT: s_mov_b32 s2, 3 -; UNALIGNED_GFX9-NEXT: s_mov_b32 s1, 2 -; UNALIGNED_GFX9-NEXT: s_mov_b32 s0, 1 -; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v3, s2 -; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v2, s1 -; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, s0 -; UNALIGNED_GFX9-NEXT: scratch_store_dwordx3 v0, v[1:3], off +; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v3, 1 +; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 2 +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v2, 2, v0 +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v4, 1, v0 +; UNALIGNED_GFX9-NEXT: scratch_store_byte v0, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v3, 0 +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v6, 4, v0 +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v7, 6, v0 +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v9, 8, v0 +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v10, 10, v0 +; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v12, 3 +; UNALIGNED_GFX9-NEXT: scratch_store_byte v4, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v5, 3, v0 +; UNALIGNED_GFX9-NEXT: scratch_store_byte v2, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_store_byte v5, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v8, 5, v0 +; UNALIGNED_GFX9-NEXT: scratch_store_byte v6, v1, off ; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX9-NEXT: scratch_load_dwordx3 v[0:2], v0, off glc +; UNALIGNED_GFX9-NEXT: scratch_store_byte v8, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v1, 7, v0 +; UNALIGNED_GFX9-NEXT: scratch_store_byte v7, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_store_byte v1, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v11, 9, v0 +; UNALIGNED_GFX9-NEXT: scratch_store_byte v9, v12, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_store_byte v11, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v12, 11, v0 +; UNALIGNED_GFX9-NEXT: scratch_store_byte v10, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_store_byte v12, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v0, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr5 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr9 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr1 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr8 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr2 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr12 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr4 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr11 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr7 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr6 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr10 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr0 +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v4, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v2, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v5, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v6, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v8, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v7, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v1, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v9, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v11, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v10, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v12, off glc ; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX9-NEXT: s_setpc_b64 s[30:31] ; ; UNALIGNED_GFX10-LABEL: store_load_v3i32_unaligned: ; UNALIGNED_GFX10: ; %bb.0: ; %bb ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; UNALIGNED_GFX10-NEXT: s_mov_b32 s2, 3 -; UNALIGNED_GFX10-NEXT: s_mov_b32 s1, 2 -; UNALIGNED_GFX10-NEXT: s_mov_b32 s0, 1 -; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v3, s2 -; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v2, s1 -; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, s0 -; UNALIGNED_GFX10-NEXT: scratch_store_dwordx3 v0, v[1:3], off +; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 1 +; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v3, 0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v0 +; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v2, 2 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v4, 2, v0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v0, v1, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v1, 3, v0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v6, 4, v0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v7, 5, v0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v5, v3, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v4, v3, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v8, 6, v0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v1, v3, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v6, v2, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v7, v3, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v2, 7, v0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v9, 8, v0 +; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v10, 3 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v11, 9, v0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v12, 10, v0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v13, 11, v0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v8, v3, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v2, v3, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v9, v10, off ; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; UNALIGNED_GFX10-NEXT: scratch_load_dwordx3 v[0:2], v0, off glc dlc +; UNALIGNED_GFX10-NEXT: scratch_store_byte v11, v3, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v12, v3, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v13, v3, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v0, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v5, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v4, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v1, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v6, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v7, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v8, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v2, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v9, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v11, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v12, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v13, off glc dlc ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX10-NEXT: s_setpc_b64 s[30:31] ; ; UNALIGNED_GFX940-LABEL: store_load_v3i32_unaligned: ; UNALIGNED_GFX940: ; %bb.0: ; %bb ; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; UNALIGNED_GFX940-NEXT: s_mov_b32 s2, 3 -; UNALIGNED_GFX940-NEXT: s_mov_b32 s1, 2 -; UNALIGNED_GFX940-NEXT: s_mov_b32 s0, 1 -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v4, s2 -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v3, s1 -; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v2, s0 -; UNALIGNED_GFX940-NEXT: scratch_store_dwordx3 v0, v[2:4], off sc0 sc1 +; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v3, 1 +; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 2 +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v2, 2, v0 +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v4, 1, v0 +; UNALIGNED_GFX940-NEXT: scratch_store_byte v0, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v3, 0 +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v6, 4, v0 +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v7, 6, v0 +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v9, 8, v0 +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v10, 10, v0 +; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v12, 3 +; UNALIGNED_GFX940-NEXT: scratch_store_byte v4, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v5, 3, v0 +; UNALIGNED_GFX940-NEXT: scratch_store_byte v2, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_store_byte v5, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v8, 5, v0 +; UNALIGNED_GFX940-NEXT: scratch_store_byte v6, v1, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_store_byte v8, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v1, 7, v0 +; UNALIGNED_GFX940-NEXT: scratch_store_byte v7, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_store_byte v1, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v11, 9, v0 +; UNALIGNED_GFX940-NEXT: scratch_store_byte v9, v12, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_store_byte v11, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v12, 11, v0 +; UNALIGNED_GFX940-NEXT: scratch_store_byte v10, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_store_byte v12, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v0, off sc0 sc1 ; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_dwordx3 v[0:2], v0, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr5 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr9 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr1 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr8 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr2 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr12 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr4 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr11 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr7 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr6 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr10 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr0 +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v4, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v2, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v5, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v6, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v8, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v7, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v1, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v9, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v11, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v10, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v12, off sc0 sc1 ; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX940-NEXT: s_setpc_b64 s[30:31] ; ; UNALIGNED_GFX11-LABEL: store_load_v3i32_unaligned: ; UNALIGNED_GFX11: ; %bb.0: ; %bb ; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; UNALIGNED_GFX11-NEXT: s_mov_b32 s2, 3 -; UNALIGNED_GFX11-NEXT: s_mov_b32 s1, 2 -; UNALIGNED_GFX11-NEXT: s_mov_b32 s0, 1 -; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v3, s2 :: v_dual_mov_b32 v2, s1 -; UNALIGNED_GFX11-NEXT: v_mov_b32_e32 v1, s0 -; UNALIGNED_GFX11-NEXT: scratch_store_b96 v0, v[1:3], off dlc +; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2 +; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_add_nc_u32 v4, 2, v0 +; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v10, 3 :: v_dual_add_nc_u32 v5, 1, v0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v0, v1, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v6, 4, v0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v7, 5, v0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v5, v3, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v4, v3, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v8, 6, v0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v1, v3, off dlc ; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; UNALIGNED_GFX11-NEXT: scratch_load_b96 v[0:2], v0, off glc dlc +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v6, v2, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v7, v3, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v2, 7, v0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v9, 8, v0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v11, 9, v0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v12, 10, v0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v13, 11, v0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v8, v3, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v2, v3, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v9, v10, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v11, v3, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v12, v3, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v13, v3, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v0, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v5, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v4, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v1, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v6, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v7, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v8, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v2, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v9, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v11, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v12, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v13, off glc dlc ; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -2651,16 +3136,57 @@ define void @store_load_v3i32_unaligned(ptr addrspace(5) nocapture %arg) { ; UNALIGNED_GFX12-NEXT: s_wait_samplecnt 0x0 ; UNALIGNED_GFX12-NEXT: s_wait_bvhcnt 0x0 ; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0 -; UNALIGNED_GFX12-NEXT: s_mov_b32 s2, 3 -; UNALIGNED_GFX12-NEXT: s_mov_b32 s1, 2 -; UNALIGNED_GFX12-NEXT: s_mov_b32 s0, 1 -; UNALIGNED_GFX12-NEXT: s_wait_alu 0xfffe -; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v3, s2 :: v_dual_mov_b32 v2, s1 -; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v1, s0 +; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 0 +; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v3, 2 +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v1, off scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:1 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:3 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v1, 3 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:5 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:6 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:7 scope:SCOPE_SYS ; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 -; UNALIGNED_GFX12-NEXT: scratch_store_b96 v0, v[1:3], off scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v1, off offset:8 scope:SCOPE_SYS ; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 -; UNALIGNED_GFX12-NEXT: scratch_load_b96 v[0:2], v0, off scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:9 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:10 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:11 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:1 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:2 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:3 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:4 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:5 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:6 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:7 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:8 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:9 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:10 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v0, v0, off offset:11 scope:SCOPE_SYS ; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 ; UNALIGNED_GFX12-NEXT: s_setpc_b64 s[30:31] bb: @@ -2741,81 +3267,456 @@ define void @store_load_v4i32_unaligned(ptr addrspace(5) nocapture %arg) { ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_mov_b32 s3, 4 -; GFX12-NEXT: s_mov_b32 s2, 3 -; GFX12-NEXT: s_mov_b32 s1, 2 -; GFX12-NEXT: s_mov_b32 s0, 1 -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2 -; GFX12-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 +; GFX12-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 0 +; GFX12-NEXT: v_mov_b32_e32 v3, 2 +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v1, off scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:1 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:3 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: v_mov_b32_e32 v1, 3 +; GFX12-NEXT: v_mov_b32_e32 v3, 4 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:5 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:6 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:7 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v1, off offset:8 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:9 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:10 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:11 scope:SCOPE_SYS ; GFX12-NEXT: s_wait_storecnt 0x0 -; GFX12-NEXT: scratch_store_b128 v0, v[1:4], off scope:SCOPE_SYS +; GFX12-NEXT: scratch_store_b8 v0, v3, off offset:12 scope:SCOPE_SYS ; GFX12-NEXT: s_wait_storecnt 0x0 -; GFX12-NEXT: scratch_load_b128 v[0:3], v0, off scope:SCOPE_SYS +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:13 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:14 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:15 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:1 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:2 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:3 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:4 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:5 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:6 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:7 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:8 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:9 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:10 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:11 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:12 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:13 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:14 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: scratch_load_u8 v0, v0, off offset:15 scope:SCOPE_SYS ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; UNALIGNED_GFX9-LABEL: store_load_v4i32_unaligned: ; UNALIGNED_GFX9: ; %bb.0: ; %bb ; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; UNALIGNED_GFX9-NEXT: s_mov_b32 s3, 4 -; UNALIGNED_GFX9-NEXT: s_mov_b32 s2, 3 -; UNALIGNED_GFX9-NEXT: s_mov_b32 s1, 2 -; UNALIGNED_GFX9-NEXT: s_mov_b32 s0, 1 -; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v4, s3 -; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v3, s2 -; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v2, s1 -; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, s0 -; UNALIGNED_GFX9-NEXT: scratch_store_dwordx4 v0, v[1:4], off -; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX9-NEXT: scratch_load_dwordx4 v[0:3], v0, off glc +; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v3, 1 +; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v1, 2 +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v2, 2, v0 +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v4, 1, v0 +; UNALIGNED_GFX9-NEXT: scratch_store_byte v0, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v3, 0 +; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v6, 4 +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v7, 4, v0 +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v8, 6, v0 +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v10, 8, v0 +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v11, 10, v0 +; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v13, 3 +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v14, 12, v0 +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v15, 14, v0 +; UNALIGNED_GFX9-NEXT: scratch_store_byte v4, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v5, 3, v0 +; UNALIGNED_GFX9-NEXT: scratch_store_byte v2, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_store_byte v5, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v9, 5, v0 +; UNALIGNED_GFX9-NEXT: scratch_store_byte v7, v1, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_store_byte v9, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v1, 7, v0 +; UNALIGNED_GFX9-NEXT: scratch_store_byte v8, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_store_byte v1, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v12, 9, v0 +; UNALIGNED_GFX9-NEXT: scratch_store_byte v10, v13, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_store_byte v12, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v13, 11, v0 +; UNALIGNED_GFX9-NEXT: scratch_store_byte v11, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_store_byte v13, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v16, 13, v0 +; UNALIGNED_GFX9-NEXT: scratch_store_byte v14, v6, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_store_byte v16, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: v_add_u32_e32 v6, 15, v0 +; UNALIGNED_GFX9-NEXT: scratch_store_byte v15, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_store_byte v6, v3, off +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v0, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v4, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v2, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v5, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v7, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v9, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v8, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v1, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v10, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v12, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v11, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v13, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v14, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v16, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v3, v15, off glc +; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr11 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr4 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr15 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr10 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr7 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr13 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr5 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr14 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr12 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr8 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr2 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr1 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr9 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr16 +; UNALIGNED_GFX9-NEXT: ; kill: killed $vgpr0 +; UNALIGNED_GFX9-NEXT: scratch_load_ubyte v0, v6, off glc ; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX9-NEXT: s_setpc_b64 s[30:31] ; ; UNALIGNED_GFX10-LABEL: store_load_v4i32_unaligned: ; UNALIGNED_GFX10: ; %bb.0: ; %bb ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; UNALIGNED_GFX10-NEXT: s_mov_b32 s3, 4 -; UNALIGNED_GFX10-NEXT: s_mov_b32 s2, 3 -; UNALIGNED_GFX10-NEXT: s_mov_b32 s1, 2 -; UNALIGNED_GFX10-NEXT: s_mov_b32 s0, 1 -; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v4, s3 -; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v3, s2 -; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v2, s1 -; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, s0 -; UNALIGNED_GFX10-NEXT: scratch_store_dwordx4 v0, v[1:4], off +; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 1 +; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v2, 2 +; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v3, 0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v6, 4, v0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v0, v1, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v1, 3, v0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v5, 2, v0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v7, 5, v0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v4, v3, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v5, v3, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v1, v3, off ; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; UNALIGNED_GFX10-NEXT: scratch_load_dwordx4 v[0:3], v0, off glc dlc +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v9, 6, v0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v6, v2, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v7, v3, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v2, 7, v0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v10, 8, v0 +; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v11, 3 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v12, 9, v0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v9, v3, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v13, 10, v0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v2, v3, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v10, v11, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v12, v3, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v11, 11, v0 +; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v8, 4 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v14, 12, v0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v15, 13, v0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v16, 14, v0 +; UNALIGNED_GFX10-NEXT: v_add_nc_u32_e32 v17, 15, v0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v13, v3, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v11, v3, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v14, v8, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v15, v3, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v16, v3, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_store_byte v17, v3, off +; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v0, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v4, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v5, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v1, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v6, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v7, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v9, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v2, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v10, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v12, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v13, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v11, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v14, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v15, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v16, off glc dlc +; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX10-NEXT: scratch_load_ubyte v0, v17, off glc dlc ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX10-NEXT: s_setpc_b64 s[30:31] ; ; UNALIGNED_GFX940-LABEL: store_load_v4i32_unaligned: ; UNALIGNED_GFX940: ; %bb.0: ; %bb ; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; UNALIGNED_GFX940-NEXT: s_mov_b32 s3, 4 -; UNALIGNED_GFX940-NEXT: s_mov_b32 s2, 3 -; UNALIGNED_GFX940-NEXT: s_mov_b32 s1, 2 -; UNALIGNED_GFX940-NEXT: s_mov_b32 s0, 1 -; UNALIGNED_GFX940-NEXT: v_mov_b64_e32 v[4:5], s[2:3] -; UNALIGNED_GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1] -; UNALIGNED_GFX940-NEXT: scratch_store_dwordx4 v0, v[2:5], off sc0 sc1 +; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v3, 1 +; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v1, 2 +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v2, 2, v0 +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v4, 1, v0 +; UNALIGNED_GFX940-NEXT: scratch_store_byte v0, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v3, 0 +; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v6, 4 +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v7, 4, v0 +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v8, 6, v0 +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v10, 8, v0 +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v11, 10, v0 +; UNALIGNED_GFX940-NEXT: v_mov_b32_e32 v13, 3 +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v14, 12, v0 +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v15, 14, v0 +; UNALIGNED_GFX940-NEXT: scratch_store_byte v4, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v5, 3, v0 +; UNALIGNED_GFX940-NEXT: scratch_store_byte v2, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_store_byte v5, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v9, 5, v0 +; UNALIGNED_GFX940-NEXT: scratch_store_byte v7, v1, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_store_byte v9, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v1, 7, v0 +; UNALIGNED_GFX940-NEXT: scratch_store_byte v8, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_store_byte v1, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v12, 9, v0 +; UNALIGNED_GFX940-NEXT: scratch_store_byte v10, v13, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_store_byte v12, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v13, 11, v0 +; UNALIGNED_GFX940-NEXT: scratch_store_byte v11, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_store_byte v13, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v16, 13, v0 +; UNALIGNED_GFX940-NEXT: scratch_store_byte v14, v6, off sc0 sc1 ; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) -; UNALIGNED_GFX940-NEXT: scratch_load_dwordx4 v[0:3], v0, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: scratch_store_byte v16, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: v_add_u32_e32 v6, 15, v0 +; UNALIGNED_GFX940-NEXT: scratch_store_byte v15, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_store_byte v6, v3, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v0, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v4, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v2, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v5, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v7, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v9, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v8, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v1, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v10, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v12, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v11, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v13, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v14, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v16, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v3, v15, off sc0 sc1 +; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr11 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr4 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr15 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr10 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr7 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr13 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr5 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr14 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr12 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr8 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr2 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr1 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr9 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr16 +; UNALIGNED_GFX940-NEXT: ; kill: killed $vgpr0 +; UNALIGNED_GFX940-NEXT: scratch_load_ubyte v0, v6, off sc0 sc1 ; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX940-NEXT: s_setpc_b64 s[30:31] ; ; UNALIGNED_GFX11-LABEL: store_load_v4i32_unaligned: ; UNALIGNED_GFX11: ; %bb.0: ; %bb ; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; UNALIGNED_GFX11-NEXT: s_mov_b32 s3, 4 -; UNALIGNED_GFX11-NEXT: s_mov_b32 s2, 3 -; UNALIGNED_GFX11-NEXT: s_mov_b32 s1, 2 -; UNALIGNED_GFX11-NEXT: s_mov_b32 s0, 1 -; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2 -; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 -; UNALIGNED_GFX11-NEXT: scratch_store_b128 v0, v[1:4], off dlc +; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2 +; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_add_nc_u32 v4, 1, v0 +; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v11, 3 :: v_dual_add_nc_u32 v6, 4, v0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v0, v1, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v1, 3, v0 +; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v8, 4 :: v_dual_add_nc_u32 v5, 2, v0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v7, 5, v0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v4, v3, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v5, v3, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v1, v3, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v9, 6, v0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v6, v2, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v7, v3, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v2, 7, v0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v10, 8, v0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v12, 9, v0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v9, v3, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v13, 10, v0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v2, v3, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v10, v11, off dlc ; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; UNALIGNED_GFX11-NEXT: scratch_load_b128 v[0:3], v0, off glc dlc +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v12, v3, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v11, 11, v0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v14, 12, v0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v15, 13, v0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v16, 14, v0 +; UNALIGNED_GFX11-NEXT: v_add_nc_u32_e32 v17, 15, v0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v13, v3, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v11, v3, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v14, v8, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v15, v3, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v16, v3, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_store_b8 v17, v3, off dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v0, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v4, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v5, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v1, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v6, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v7, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v9, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v2, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v10, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v12, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v13, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v11, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v14, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v15, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v16, off glc dlc +; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX11-NEXT: scratch_load_u8 v0, v17, off glc dlc ; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -2826,17 +3727,74 @@ define void @store_load_v4i32_unaligned(ptr addrspace(5) nocapture %arg) { ; UNALIGNED_GFX12-NEXT: s_wait_samplecnt 0x0 ; UNALIGNED_GFX12-NEXT: s_wait_bvhcnt 0x0 ; UNALIGNED_GFX12-NEXT: s_wait_kmcnt 0x0 -; UNALIGNED_GFX12-NEXT: s_mov_b32 s3, 4 -; UNALIGNED_GFX12-NEXT: s_mov_b32 s2, 3 -; UNALIGNED_GFX12-NEXT: s_mov_b32 s1, 2 -; UNALIGNED_GFX12-NEXT: s_mov_b32 s0, 1 -; UNALIGNED_GFX12-NEXT: s_wait_alu 0xfffe -; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2 -; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 +; UNALIGNED_GFX12-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 0 +; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v3, 2 +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v1, off scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:1 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:3 scope:SCOPE_SYS ; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 -; UNALIGNED_GFX12-NEXT: scratch_store_b128 v0, v[1:4], off scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS ; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 -; UNALIGNED_GFX12-NEXT: scratch_load_b128 v[0:3], v0, off scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v1, 3 +; UNALIGNED_GFX12-NEXT: v_mov_b32_e32 v3, 4 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:5 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:6 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:7 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v1, off offset:8 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:9 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:10 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:11 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v3, off offset:12 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:13 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:14 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_store_b8 v0, v2, off offset:15 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_storecnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:1 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:2 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:3 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:4 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:5 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:6 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:7 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:8 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:9 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:10 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:11 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:12 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:13 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v1, v0, off offset:14 scope:SCOPE_SYS +; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 +; UNALIGNED_GFX12-NEXT: scratch_load_u8 v0, v0, off offset:15 scope:SCOPE_SYS ; UNALIGNED_GFX12-NEXT: s_wait_loadcnt 0x0 ; UNALIGNED_GFX12-NEXT: s_setpc_b64 s[30:31] bb: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir index 161099d1e7e590..97710d4c1e8d39 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -497,8 +497,14 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 1) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s16_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 @@ -517,15 +523,27 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 1) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR]](s32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s16_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 1) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s16) = G_LOAD %0 :: (load (s16), align 1, addrspace 0) %2:_(s32) = G_ANYEXT %1 @@ -648,8 +666,14 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 2) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s32_align2 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 @@ -668,15 +692,27 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 2) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR]](s32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s32_align2 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 2) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load (s32), align 2, addrspace 0) $vgpr0 = COPY %1 @@ -752,8 +788,24 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: $vgpr0 = COPY [[OR2]](s32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s32_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 @@ -782,15 +834,47 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR2]](s32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s32_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 0) $vgpr0 = COPY %1 @@ -1121,8 +1205,26 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 2) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s64_align2 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 @@ -1153,15 +1255,51 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 2) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s64_align2 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 2) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load (s64), align 2, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -1269,8 +1407,44 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 1) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s64_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 @@ -1319,15 +1493,87 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 1) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s64_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64), align 1) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s64) = G_LOAD %0 :: (load (s64), align 1, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -1681,8 +1927,29 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 2) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10) + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s96_align2 @@ -1718,16 +1985,58 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s96_align2 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 2) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10) + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s96) = G_LOAD %0 :: (load (s96), align 2, addrspace 0) @@ -1870,8 +2179,55 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 1) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) + ; GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) + ; GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) + ; GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) + ; GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s96_align1 @@ -1933,16 +2289,110 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s96_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p0) :: (load (<3 x s32>), align 1) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 0) @@ -2600,8 +3050,70 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 1) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) + ; GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) + ; GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) + ; GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) + ; GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX12-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) + ; GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) + ; GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) + ; GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) + ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) + ; GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_s128_align1 @@ -2678,16 +3190,140 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_s128_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 1) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 0) @@ -3086,8 +3722,45 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1), align 1) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_p1_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 @@ -3137,15 +3810,89 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1), align 1) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX11PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_p1_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p0) :: (load (p1), align 1) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p1) = G_LOAD %0 :: (load (p1), align 1, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -3431,8 +4178,27 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4), align 2) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] + ; GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_p4_align2 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 @@ -3464,15 +4230,53 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4), align 2) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX11PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_p4_align2 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4), align 2) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR2]](s64) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p4) = G_LOAD %0 :: (load (p4), align 2, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -3580,8 +4384,45 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4), align 1) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_p4_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 @@ -3631,15 +4472,89 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4), align 1) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX11PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_p4_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p0) :: (load (p4), align 1) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p4) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[OR6]](s64) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p4) = G_LOAD %0 :: (load (p4), align 1, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -3763,8 +4678,15 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load (p5), align 2) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; GFX12-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_p5_align2 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 @@ -3784,15 +4706,29 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load (p5), align 2) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_p5_align2 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load (p5), align 2) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p5) = G_LOAD %0 :: (load (p5), align 2, addrspace 0) $vgpr0 = COPY %1 @@ -3870,8 +4806,25 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load (p5), align 1) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; GFX12-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_p5_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 @@ -3901,15 +4854,49 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load (p5), align 1) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_p5_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p0) :: (load (p5), align 1) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p5) = G_LOAD %0 :: (load (p5), align 1, addrspace 0) $vgpr0 = COPY %1 @@ -4101,8 +5088,14 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 1) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2s8_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 @@ -4121,15 +5114,27 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 1) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR]](s32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2s8_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 1) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 0) %2:_(s16) = G_BITCAST %1 @@ -4553,36 +5558,41 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16), align 1) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 2) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX12-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX12-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] + ; GFX12-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] + ; GFX12-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; GFX12-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX12-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) - ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]] - ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX12-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; GFX12-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] + ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX12-NEXT: $vgpr0 = COPY [[OR4]](s32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v3s8_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 @@ -4628,71 +5638,81 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16), align 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; UNALIGNED_GFX11PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; UNALIGNED_GFX11PLUS-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]] - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR4]](s32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v3s8_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16), align 1) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 2) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; UNALIGNED_GFX12-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; UNALIGNED_GFX12-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX12-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] + ; UNALIGNED_GFX12-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; UNALIGNED_GFX12-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; UNALIGNED_GFX12-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; UNALIGNED_GFX12-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; UNALIGNED_GFX12-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR4]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 0) %2:_(s24) = G_BITCAST %1 @@ -4817,8 +5837,14 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 2) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v4s8_align2 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 @@ -4837,15 +5863,27 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 2) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR]](s32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v4s8_align2 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 2) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 2, addrspace 0) %2:_(s32) = G_BITCAST %1 @@ -4922,8 +5960,24 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: $vgpr0 = COPY [[OR2]](s32) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v4s8_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 @@ -4952,15 +6006,47 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[OR2]](s32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v4s8_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s8>) = G_LOAD %0 :: (load (<4 x s8>), align 1, addrspace 0) %2:_(s32) = G_BITCAST %1 @@ -5389,8 +6475,14 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>), align 2) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX12-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2s16_align2 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 @@ -5409,15 +6501,27 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>), align 2) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2s16_align2 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>), align 2) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 0) $vgpr0 = COPY %1 @@ -5501,8 +6605,24 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>), align 1) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX12-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2s16_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 @@ -5531,15 +6651,47 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>), align 1) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2s16_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p0) :: (load (<2 x s16>), align 1) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 0) $vgpr0 = COPY %1 @@ -6429,22 +7581,36 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 1) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2, align 1) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 1) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) @@ -6501,22 +7667,36 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2, align 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) @@ -6530,22 +7710,36 @@ body: | ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16), align 1) - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2, align 1) - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4, align 1) - ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) ; UNALIGNED_GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; UNALIGNED_GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) @@ -6789,8 +7983,24 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 2) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v4s16_align2 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 @@ -6819,15 +8029,47 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 2) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v4s16_align2 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 2) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s16)) + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 2, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -6949,8 +8191,42 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 1) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v4s16_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 @@ -6997,15 +8273,83 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 1) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v4s16_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>), align 1) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 1, addrspace 0) $vgpr0_vgpr1 = COPY %1 @@ -8460,8 +9804,45 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 2) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR3]](s32) + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s16) from unknown-address + 12) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s16) from unknown-address + 14) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2s64_align2 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 @@ -8511,15 +9892,89 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 2) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s16) from unknown-address + 12) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s16) from unknown-address + 14) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2s64_align2 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 2) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s16)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 8) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 10) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR3]](s32) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s16) from unknown-address + 12) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s16) from unknown-address + 14) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR4]](s32) + ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s64) = G_OR [[SHL5]], [[ZEXT1]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR2]](s64), [[OR5]](s64) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 2, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -8689,8 +10144,79 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 1) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX12-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) + ; GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) + ; GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) + ; GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) + ; GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) + ; GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] + ; GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) + ; GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) + ; GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) + ; GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) + ; GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) + ; GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) + ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) + ; GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; GFX12-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) + ; GFX12-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] + ; GFX12-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; GFX12-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX12-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2s64_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 @@ -8774,15 +10300,157 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 1) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2s64_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 1) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) + ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; UNALIGNED_GFX12-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 1, addrspace 0) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -9326,14 +10994,114 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 1) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 1) - ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX12-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) + ; GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) + ; GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) + ; GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) + ; GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) + ; GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] + ; GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) + ; GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) + ; GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) + ; GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) + ; GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) + ; GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) + ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) + ; GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; GFX12-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) + ; GFX12-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] + ; GFX12-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; GFX12-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX12-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] + ; GFX12-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) + ; GFX12-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16) + ; GFX12-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17) + ; GFX12-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) + ; GFX12-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; GFX12-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18) + ; GFX12-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) + ; GFX12-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19) + ; GFX12-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; GFX12-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) + ; GFX12-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] + ; GFX12-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) + ; GFX12-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) + ; GFX12-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20) + ; GFX12-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21) + ; GFX12-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) + ; GFX12-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; GFX12-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22) + ; GFX12-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) + ; GFX12-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23) + ; GFX12-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) + ; GFX12-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; GFX12-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) + ; GFX12-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] + ; GFX12-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; GFX12-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; GFX12-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v3s64_align1 @@ -9454,28 +11222,228 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; UNALIGNED_GFX11PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11PLUS-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v3s64_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 1) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 1) - ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) + ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; UNALIGNED_GFX12-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] + ; UNALIGNED_GFX12-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17) + ; UNALIGNED_GFX12-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19) + ; UNALIGNED_GFX12-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; UNALIGNED_GFX12-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21) + ; UNALIGNED_GFX12-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23) + ; UNALIGNED_GFX12-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; UNALIGNED_GFX12-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) + ; UNALIGNED_GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; UNALIGNED_GFX12-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 0) @@ -10048,11 +12016,147 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 1) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16, align 1) - ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX12-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) + ; GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) + ; GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) + ; GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) + ; GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) + ; GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] + ; GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) + ; GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) + ; GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) + ; GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) + ; GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) + ; GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) + ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) + ; GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; GFX12-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) + ; GFX12-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] + ; GFX12-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; GFX12-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX12-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) + ; GFX12-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; GFX12-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) + ; GFX12-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16) + ; GFX12-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17) + ; GFX12-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) + ; GFX12-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; GFX12-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18) + ; GFX12-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) + ; GFX12-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19) + ; GFX12-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; GFX12-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) + ; GFX12-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] + ; GFX12-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) + ; GFX12-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) + ; GFX12-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20) + ; GFX12-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21) + ; GFX12-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) + ; GFX12-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; GFX12-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22) + ; GFX12-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) + ; GFX12-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23) + ; GFX12-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) + ; GFX12-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; GFX12-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) + ; GFX12-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] + ; GFX12-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) + ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; GFX12-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; GFX12-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] + ; GFX12-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) + ; GFX12-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD23]](p0) :: (load (s8) from unknown-address + 24) + ; GFX12-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD24]](p0) :: (load (s8) from unknown-address + 25) + ; GFX12-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD19]], [[C1]](s32) + ; GFX12-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[SHL21]], [[ZEXTLOAD18]] + ; GFX12-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD25]](p0) :: (load (s8) from unknown-address + 26) + ; GFX12-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD25]], [[C]](s64) + ; GFX12-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p0) :: (load (s8) from unknown-address + 27) + ; GFX12-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32) + ; GFX12-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[SHL22]], [[ZEXTLOAD20]] + ; GFX12-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[OR22]], [[C3]](s32) + ; GFX12-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[SHL23]], [[OR21]] + ; GFX12-NEXT: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[OR23]](s32) + ; GFX12-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) + ; GFX12-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD27]](p0) :: (load (s8) from unknown-address + 28) + ; GFX12-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD28]](p0) :: (load (s8) from unknown-address + 29) + ; GFX12-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD22]], [[C1]](s32) + ; GFX12-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[SHL24]], [[ZEXTLOAD21]] + ; GFX12-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD29]](p0) :: (load (s8) from unknown-address + 30) + ; GFX12-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD29]], [[C]](s64) + ; GFX12-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p0) :: (load (s8) from unknown-address + 31) + ; GFX12-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[LOAD7]], [[C1]](s32) + ; GFX12-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[SHL25]], [[ZEXTLOAD23]] + ; GFX12-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) + ; GFX12-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] + ; GFX12-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; GFX12-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) + ; GFX12-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR20]](s64), [[OR27]](s64) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v4s64_align1 @@ -10206,22 +12310,294 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16, align 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD23]](p0) :: (load (s8) from unknown-address + 24) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD24]](p0) :: (load (s8) from unknown-address + 25) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD19]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[SHL21]], [[ZEXTLOAD18]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD25]](p0) :: (load (s8) from unknown-address + 26) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD25]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p0) :: (load (s8) from unknown-address + 27) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[SHL22]], [[ZEXTLOAD20]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[OR22]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[SHL23]], [[OR21]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[OR23]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD27]](p0) :: (load (s8) from unknown-address + 28) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD28]](p0) :: (load (s8) from unknown-address + 29) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD22]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[SHL24]], [[ZEXTLOAD21]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD29]](p0) :: (load (s8) from unknown-address + 30) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD29]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p0) :: (load (s8) from unknown-address + 31) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[LOAD7]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[SHL25]], [[ZEXTLOAD23]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] + ; UNALIGNED_GFX11PLUS-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR20]](s64), [[OR27]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v4s64_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 1) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p0) :: (load (<2 x s64>) from unknown-address + 16, align 1) - ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) + ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; UNALIGNED_GFX12-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) + ; UNALIGNED_GFX12-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p0) :: (load (s8) from unknown-address + 16) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p0) :: (load (s8) from unknown-address + 17) + ; UNALIGNED_GFX12-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[ZEXTLOAD12]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p0) :: (load (s8) from unknown-address + 18) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD17]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p0) :: (load (s8) from unknown-address + 19) + ; UNALIGNED_GFX12-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD14]] + ; UNALIGNED_GFX12-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[OR15]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[OR14]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT2:%[0-9]+]]:_(s64) = G_ZEXT [[OR16]](s32) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C4]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p0) :: (load (s8) from unknown-address + 20) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p0) :: (load (s8) from unknown-address + 21) + ; UNALIGNED_GFX12-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[ZEXTLOAD15]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p0) :: (load (s8) from unknown-address + 22) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD21]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p0) :: (load (s8) from unknown-address + 23) + ; UNALIGNED_GFX12-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR18:%[0-9]+]]:_(s32) = G_OR [[SHL18]], [[ZEXTLOAD17]] + ; UNALIGNED_GFX12-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[OR18]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR19:%[0-9]+]]:_(s32) = G_OR [[SHL19]], [[OR17]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[OR19]](s32) + ; UNALIGNED_GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; UNALIGNED_GFX12-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD15]], [[C6]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD18:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD23]](p0) :: (load (s8) from unknown-address + 24) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD19:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD24]](p0) :: (load (s8) from unknown-address + 25) + ; UNALIGNED_GFX12-NEXT: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD19]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR21:%[0-9]+]]:_(s32) = G_OR [[SHL21]], [[ZEXTLOAD18]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD20:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD25]](p0) :: (load (s8) from unknown-address + 26) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD25]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p0) :: (load (s8) from unknown-address + 27) + ; UNALIGNED_GFX12-NEXT: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[LOAD6]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR22:%[0-9]+]]:_(s32) = G_OR [[SHL22]], [[ZEXTLOAD20]] + ; UNALIGNED_GFX12-NEXT: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[OR22]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR23:%[0-9]+]]:_(s32) = G_OR [[SHL23]], [[OR21]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT3:%[0-9]+]]:_(s64) = G_ZEXT [[OR23]](s32) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD23]], [[C4]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD21:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD27]](p0) :: (load (s8) from unknown-address + 28) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD22:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD28]](p0) :: (load (s8) from unknown-address + 29) + ; UNALIGNED_GFX12-NEXT: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD22]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR24:%[0-9]+]]:_(s32) = G_OR [[SHL24]], [[ZEXTLOAD21]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD27]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD23:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD29]](p0) :: (load (s8) from unknown-address + 30) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD29]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p0) :: (load (s8) from unknown-address + 31) + ; UNALIGNED_GFX12-NEXT: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[LOAD7]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR25:%[0-9]+]]:_(s32) = G_OR [[SHL25]], [[ZEXTLOAD23]] + ; UNALIGNED_GFX12-NEXT: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[OR25]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR26:%[0-9]+]]:_(s32) = G_OR [[SHL26]], [[OR24]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[OR26]](s32) + ; UNALIGNED_GFX12-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; UNALIGNED_GFX12-NEXT: [[SHL27:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT3]], [[COPY3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR27:%[0-9]+]]:_(s64) = G_OR [[SHL27]], [[ZEXT3]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR20]](s64), [[OR27]](s64) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]](<2 x s64>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), align 1, addrspace 0) @@ -10818,8 +13194,70 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 1) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) + ; GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) + ; GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) + ; GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) + ; GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX12-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) + ; GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) + ; GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) + ; GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) + ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) + ; GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2p1_align1 @@ -10896,16 +13334,140 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 1) - ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2p1_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load (<4 x s32>), align 1) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p0) :: (load (s8) from unknown-address + 8) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p0) :: (load (s8) from unknown-address + 9) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p0) :: (load (s8) from unknown-address + 10) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD9]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p0) :: (load (s8) from unknown-address + 11) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p0) :: (load (s8) from unknown-address + 12) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p0) :: (load (s8) from unknown-address + 13) + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p0) :: (load (s8) from unknown-address + 14) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD13]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p0) :: (load (s8) from unknown-address + 15) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x p1>) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x p1>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 1, addrspace 0) @@ -11166,8 +13728,42 @@ body: | ; GFX12: liveins: $vgpr0_vgpr1 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 1) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; ; UNALIGNED_GFX9PLUS-LABEL: name: test_load_flat_v2p3_align1 ; UNALIGNED_GFX9PLUS: liveins: $vgpr0_vgpr1 @@ -11214,15 +13810,83 @@ body: | ; UNALIGNED_GFX11PLUS: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX11PLUS-NEXT: {{ $}} ; UNALIGNED_GFX11PLUS-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 1) - ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX11PLUS-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX11PLUS-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11PLUS-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11PLUS-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; UNALIGNED_GFX11PLUS-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX11PLUS-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11PLUS-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11PLUS-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) + ; UNALIGNED_GFX11PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) + ; UNALIGNED_GFX11PLUS-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_flat_v2p3_align1 ; UNALIGNED_GFX12: liveins: $vgpr0_vgpr1 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[COPY]](p0) :: (load (<2 x p3>), align 1) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p0) :: (load (s8) from unknown-address + 1) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p0) :: (load (s8) from unknown-address + 2) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s8) from unknown-address + 3) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p0) :: (load (s8) from unknown-address + 4) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p0) :: (load (s8) from unknown-address + 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p0) :: (load (s8) from unknown-address + 6) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD5]], [[C]](s64) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p0) :: (load (s8) from unknown-address + 7) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR5]](s32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[INTTOPTR]](p3), [[INTTOPTR1]](p3) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<2 x p3>) = G_LOAD %0 :: (load (<2 x p3>), align 1, addrspace 0) $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir index 3688f4b8a0f9ae..374a192fe6a4c3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -657,8 +657,14 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s16_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -690,15 +696,27 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[OR]](s32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s16_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s16) = G_LOAD %0 :: (load (s16), align 1, addrspace 5) %2:_(s32) = G_ANYEXT %1 @@ -862,8 +880,14 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 2, addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s32_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -895,15 +919,27 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[OR]](s32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s32_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), align 2, addrspace 5) $vgpr0 = COPY %1 @@ -1009,8 +1045,24 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: $vgpr0 = COPY [[OR2]](s32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s32_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -1062,15 +1114,47 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[OR2]](s32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s32_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), align 1, addrspace 5) $vgpr0 = COPY %1 @@ -1512,14 +1596,20 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] + ; GFX12-NEXT: $vgpr0 = COPY [[OR1]](s32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s24_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -1563,27 +1653,39 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[OR1]](s32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s24_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR1]](s32) %0:_(p5) = COPY $vgpr0 %1:_(s24) = G_LOAD %0 :: (load (s24), align 1, addrspace 5) %2:_(s32) = G_ANYEXT %1 @@ -2108,8 +2210,26 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), align 2, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s64_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -2157,15 +2277,51 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), align 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s64_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), align 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[OR2]](s64) %0:_(p5) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load (s64), align 2, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -2327,8 +2483,44 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s64_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -2412,15 +2604,87 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s64_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p5) :: (load (s64), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[OR6]](s64) %0:_(p5) = COPY $vgpr0 %1:_(s64) = G_LOAD %0 :: (load (s64), align 1, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -2636,8 +2900,54 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s96_align16 @@ -2752,16 +3062,108 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s96_align16 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p5) = COPY $vgpr0 %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 5) @@ -3197,8 +3599,29 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 2, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s96_align2 @@ -3263,16 +3686,58 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s96_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p5) = COPY $vgpr0 %1:_(s96) = G_LOAD %0 :: (load (s96), align 2, addrspace 5) @@ -3489,8 +3954,54 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s96_align1 @@ -3605,16 +4116,108 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s96_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) %0:_(p5) = COPY $vgpr0 %1:_(s96) = G_LOAD %0 :: (load (s96), align 1, addrspace 5) @@ -3882,8 +4485,69 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s128_align16 @@ -4028,16 +4692,138 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s128_align16 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p5) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 5) @@ -4542,8 +5328,36 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 2, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s128_align2 @@ -4622,16 +5436,72 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s128_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p5) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load (s128), align 2, addrspace 5) @@ -4899,8 +5769,69 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_s128_align1 @@ -5045,16 +5976,138 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_s128_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<4 x s32>) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](s128) %0:_(p5) = COPY $vgpr0 %1:_(s128) = G_LOAD %0 :: (load (s128), align 1, addrspace 5) @@ -5393,8 +6446,27 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p5) :: (load (p1), align 2, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] + ; GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_p1_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -5442,15 +6514,53 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p5) :: (load (p1), align 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_p1_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p5) :: (load (p1), align 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR1]](s32) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[SHL2]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR2]](s64) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) %0:_(p5) = COPY $vgpr0 %1:_(p1) = G_LOAD %0 :: (load (p1), align 2, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -5612,8 +6722,45 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p5) :: (load (p1), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_p1_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -5697,15 +6844,89 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p5) :: (load (p1), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_p1_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p5) :: (load (p1), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[OR6]](s64) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p1) %0:_(p5) = COPY $vgpr0 %1:_(p1) = G_LOAD %0 :: (load (p1), align 1, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -5871,8 +7092,15 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 2, addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; GFX12-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_p3_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -5906,15 +7134,29 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_p3_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) %0:_(p5) = COPY $vgpr0 %1:_(p3) = G_LOAD %0 :: (load (p3), align 2, addrspace 5) $vgpr0 = COPY %1 @@ -6023,8 +7265,25 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; GFX12-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_p3_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -6078,15 +7337,49 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_p3_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[COPY]](p5) :: (load (p3), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](p3) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p3) = G_INTTOPTR [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[INTTOPTR]](p3) %0:_(p5) = COPY $vgpr0 %1:_(p3) = G_LOAD %0 :: (load (p3), align 1, addrspace 5) $vgpr0 = COPY %1 @@ -6252,8 +7545,15 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), align 2, addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; GFX12-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_p5_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -6287,15 +7587,29 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), align 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_p5_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), align 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p5) = COPY $vgpr0 %1:_(p5) = G_LOAD %0 :: (load (p5), align 2, addrspace 5) $vgpr0 = COPY %1 @@ -6404,8 +7718,25 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; GFX12-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_p5_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -6459,15 +7790,49 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_p5_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[COPY]](p5) :: (load (p5), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](p5) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[INTTOPTR]](p5) %0:_(p5) = COPY $vgpr0 %1:_(p5) = G_LOAD %0 :: (load (p5), align 1, addrspace 5) $vgpr0 = COPY %1 @@ -6647,10 +8012,15 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2s8_align1 @@ -6687,20 +8057,30 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2s8_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[LOAD]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LSHR]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[LSHR]](s32) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s8>) = G_LOAD %0 :: (load (<2 x s8>), align 1, addrspace 5) @@ -7293,36 +8673,41 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) - ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) + ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX12-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX12-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX12-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] + ; GFX12-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] + ; GFX12-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX12-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; GFX12-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX12-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) - ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]] - ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX12-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; GFX12-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] + ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX12-NEXT: $vgpr0 = COPY [[OR4]](s32) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v3s8_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -7408,71 +8793,81 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) - ; UNALIGNED_GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; UNALIGNED_GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; UNALIGNED_GFX11-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] + ; UNALIGNED_GFX11-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] + ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; UNALIGNED_GFX11-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; UNALIGNED_GFX11-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; UNALIGNED_GFX11-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) - ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]] - ; UNALIGNED_GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; UNALIGNED_GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; UNALIGNED_GFX11-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] + ; UNALIGNED_GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; UNALIGNED_GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[OR4]](s32) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v3s8_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C2]](s32) - ; UNALIGNED_GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[OR1]], [[C3]](s32) ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; UNALIGNED_GFX12-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; UNALIGNED_GFX12-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; UNALIGNED_GFX12-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) - ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL1]] + ; UNALIGNED_GFX12-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C4]] + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C5]](s16) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; UNALIGNED_GFX12-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; UNALIGNED_GFX12-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; UNALIGNED_GFX12-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) - ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL2]] - ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR3]](s32) + ; UNALIGNED_GFX12-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C5]](s16) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL3]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[OR4]](s32) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s8>) = G_LOAD %0 :: (load (<3 x s8>), align 1, addrspace 5) %2:_(s24) = G_BITCAST %1 @@ -7939,8 +9334,69 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v16s8_align16 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -8082,15 +9538,137 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v16s8_align16 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<16 x s8>) = G_LOAD %0 :: (load (<16 x s8>), align 1, addrspace 5) %2:_(<4 x s32>) = G_BITCAST %1 @@ -8266,8 +9844,14 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX12-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2s16_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -8299,15 +9883,27 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2s16_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 2, addrspace 5) $vgpr0 = COPY %1 @@ -8425,8 +10021,24 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX12-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2s16_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -8478,15 +10090,47 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; UNALIGNED_GFX11-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2s16_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load (<2 x s16>), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; UNALIGNED_GFX12-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s16>) = G_LOAD %0 :: (load (<2 x s16>), align 1, addrspace 5) $vgpr0 = COPY %1 @@ -9392,22 +11036,36 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, align 1, addrspace 5) - ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, align 1, addrspace 5) - ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) ; GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) @@ -9507,22 +11165,36 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) ; UNALIGNED_GFX11-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNALIGNED_GFX11-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) ; UNALIGNED_GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; UNALIGNED_GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) @@ -9536,22 +11208,36 @@ body: | ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) - ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) ; UNALIGNED_GFX12-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNALIGNED_GFX12-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) ; UNALIGNED_GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; UNALIGNED_GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) @@ -9918,8 +11604,24 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 2, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v4s16_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -9969,15 +11671,47 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v4s16_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 2, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -10160,8 +11894,42 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v4s16_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -10247,15 +12015,83 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; UNALIGNED_GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v4s16_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load (<4 x s16>), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; UNALIGNED_GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s16>) = G_LOAD %0 :: (load (<4 x s16>), align 1, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -10593,8 +12429,22 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), align 2, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2s32_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -10642,15 +12492,43 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), align 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2s32_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), align 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 2, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -10812,8 +12690,40 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) + ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2s32_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -10897,15 +12807,79 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2s32_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p5) :: (load (<2 x s32>), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s32>) = G_LOAD %0 :: (load (<2 x s32>), align 1, addrspace 5) $vgpr0_vgpr1 = COPY %1 @@ -11115,8 +13089,54 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v3s32_align16 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -11228,15 +13248,107 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v3s32_align16 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 1, addrspace 5) $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -11633,8 +13745,69 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v4s32_align16 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -11776,15 +13949,137 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v4s32_align16 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 1, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -12260,8 +14555,36 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 2, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v4s32_align2 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -12337,15 +14660,71 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v4s32_align2 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 2, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -12606,8 +14985,69 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v4s32_align1 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -12749,15 +15189,137 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v4s32_align1 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p5) :: (load (<4 x s32>), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(p5) = COPY $vgpr0 %1:_(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 1, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -13893,8 +16455,78 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load (<2 x s64>), align 1, addrspace 5) - ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) + ; GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] + ; GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) + ; GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) + ; GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; GFX12-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) + ; GFX12-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] + ; GFX12-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) + ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; GFX12-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; GFX12-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) + ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; ; UNALIGNED_GFX9-LABEL: name: test_load_private_v2s64_align16 ; UNALIGNED_GFX9: liveins: $vgpr0 @@ -14038,15 +16670,155 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load (<2 x s64>), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] + ; UNALIGNED_GFX11-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] + ; UNALIGNED_GFX11-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) + ; UNALIGNED_GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; UNALIGNED_GFX11-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) + ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; ; UNALIGNED_GFX12-LABEL: name: test_load_private_v2s64_align16 ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load (<2 x s64>), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR5]](s32) + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[C5]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s64) = G_OR [[SHL6]], [[ZEXT]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[OR8]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[OR7]] + ; UNALIGNED_GFX12-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[OR9]](s32) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[OR11]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[OR10]] + ; UNALIGNED_GFX12-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[OR12]](s32) + ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; UNALIGNED_GFX12-NEXT: [[SHL13:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[COPY1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR13:%[0-9]+]]:_(s64) = G_OR [[SHL13]], [[ZEXT1]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64) + ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(p5) = COPY $vgpr0 %1:_(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 1, addrspace 5) $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -16733,12 +19505,99 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 5) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; GFX12-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) + ; GFX12-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5) + ; GFX12-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) + ; GFX12-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; GFX12-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD17]], [[C]](s32) + ; GFX12-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5) + ; GFX12-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; GFX12-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) + ; GFX12-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] + ; GFX12-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) + ; GFX12-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) + ; GFX12-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5) + ; GFX12-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) + ; GFX12-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; GFX12-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD21]], [[C]](s32) + ; GFX12-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5) + ; GFX12-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) + ; GFX12-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; GFX12-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) + ; GFX12-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) @@ -16952,12 +19811,99 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX11-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; UNALIGNED_GFX11-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX11-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD17]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; UNALIGNED_GFX11-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD21]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; UNALIGNED_GFX11-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) ; UNALIGNED_GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; UNALIGNED_GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) @@ -16967,12 +19913,99 @@ body: | ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s8), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 1, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p5) :: (load (s8) from unknown-address + 1, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s8) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s8) from unknown-address + 3, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s8) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD4]](p5) :: (load (s8) from unknown-address + 5, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s8) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s8) from unknown-address + 7, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[OR4]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[OR3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD6:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s8) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD7:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD8]](p5) :: (load (s8) from unknown-address + 9, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD7]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[SHL6]], [[ZEXTLOAD6]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD8:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s8) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s8) from unknown-address + 11, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[SHL7]], [[ZEXTLOAD8]] + ; UNALIGNED_GFX12-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[OR7]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[SHL8]], [[OR6]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; UNALIGNED_GFX12-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD9:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD11]](p5) :: (load (s8) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD10:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD12]](p5) :: (load (s8) from unknown-address + 13, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD10]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[SHL9]], [[ZEXTLOAD9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD11:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD13]](p5) :: (load (s8) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD13]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load (s8) from unknown-address + 15, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SHL10]], [[ZEXTLOAD11]] + ; UNALIGNED_GFX12-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[OR10]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[SHL11]], [[OR9]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C4]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD12:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD15]](p5) :: (load (s8) from unknown-address + 16, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD13:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD16]](p5) :: (load (s8) from unknown-address + 17, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD13]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[SHL12]], [[ZEXTLOAD12]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD15]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD14:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD17]](p5) :: (load (s8) from unknown-address + 18, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD17]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load (s8) from unknown-address + 19, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[SHL13]], [[ZEXTLOAD14]] + ; UNALIGNED_GFX12-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[OR13]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[SHL14]], [[OR12]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD11]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD15:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD19]](p5) :: (load (s8) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD16:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD20]](p5) :: (load (s8) from unknown-address + 21, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD16]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[SHL15]], [[ZEXTLOAD15]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD19]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD17:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD21]](p5) :: (load (s8) from unknown-address + 22, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD21]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load (s8) from unknown-address + 23, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[SHL16]], [[ZEXTLOAD17]] + ; UNALIGNED_GFX12-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[OR16]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[SHL17]], [[OR15]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR11]](s32), [[OR14]](s32), [[OR17]](s32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; UNALIGNED_GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) @@ -17223,12 +20256,50 @@ body: | ; GFX12: liveins: $vgpr0 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 2, addrspace 5) - ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 5) - ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) + ; GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) + ; GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) + ; GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s32) + ; GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; GFX12-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5) + ; GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) + ; GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) + ; GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5) + ; GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; GFX12-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5) + ; GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) + ; GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) + ; GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) @@ -17344,12 +20415,50 @@ body: | ; UNALIGNED_GFX11: liveins: $vgpr0 ; UNALIGNED_GFX11-NEXT: {{ $}} ; UNALIGNED_GFX11-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; UNALIGNED_GFX11-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 5) - ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) + ; UNALIGNED_GFX11-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX11-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX11-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX11-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; UNALIGNED_GFX11-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) + ; UNALIGNED_GFX11-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; UNALIGNED_GFX11-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5) + ; UNALIGNED_GFX11-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) + ; UNALIGNED_GFX11-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) + ; UNALIGNED_GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) ; UNALIGNED_GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; UNALIGNED_GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; UNALIGNED_GFX11-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) @@ -17359,12 +20468,50 @@ body: | ; UNALIGNED_GFX12: liveins: $vgpr0 ; UNALIGNED_GFX12-NEXT: {{ $}} ; UNALIGNED_GFX12-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p5) :: (load (<3 x s32>), align 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD]](<3 x s32>) - ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p5) :: (load (s16), addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; UNALIGNED_GFX12-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p5) :: (load (<3 x s32>) from unknown-address + 12, align 2, addrspace 5) - ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[LOAD1]](<3 x s32>) + ; UNALIGNED_GFX12-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (load (s16) from unknown-address + 2, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNALIGNED_GFX12-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] + ; UNALIGNED_GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p5) :: (load (s16) from unknown-address + 4, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (load (s16) from unknown-address + 6, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD1]] + ; UNALIGNED_GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD3]](p5) :: (load (s16) from unknown-address + 8, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s16) from unknown-address + 10, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LOAD2]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[ZEXTLOAD2]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; UNALIGNED_GFX12-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD3:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD5]](p5) :: (load (s16) from unknown-address + 12, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load (s16) from unknown-address + 14, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LOAD3]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[ZEXTLOAD3]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C2]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD4:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD7]](p5) :: (load (s16) from unknown-address + 16, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load (s16) from unknown-address + 18, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[LOAD4]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[SHL4]], [[ZEXTLOAD4]] + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD5]], [[C3]](s32) + ; UNALIGNED_GFX12-NEXT: [[ZEXTLOAD5:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD9]](p5) :: (load (s16) from unknown-address + 20, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD9]], [[C]](s32) + ; UNALIGNED_GFX12-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load (s16) from unknown-address + 22, addrspace 5) + ; UNALIGNED_GFX12-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LOAD5]], [[C1]](s32) + ; UNALIGNED_GFX12-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[SHL5]], [[ZEXTLOAD5]] + ; UNALIGNED_GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR3]](s32), [[OR4]](s32), [[OR5]](s32) + ; UNALIGNED_GFX12-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) ; UNALIGNED_GFX12-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; UNALIGNED_GFX12-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; UNALIGNED_GFX12-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll index fd2d0bc901f3fc..3fc5d0d4b279eb 100644 --- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll +++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll @@ -475,8 +475,14 @@ define amdgpu_kernel void @vload2_private(ptr addrspace(1) nocapture readonly %i ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: scratch_store_short off, v0, s0 offset:4 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_load_dword v0, off, s0 -; FLATSCR-NEXT: scratch_load_dword v1, off, s0 offset:2 +; FLATSCR-NEXT: scratch_load_ushort v0, off, s0 offset:2 +; FLATSCR-NEXT: scratch_load_ushort v3, off, s0 +; FLATSCR-NEXT: s_waitcnt vmcnt(1) +; FLATSCR-NEXT: v_mov_b32_e32 v1, v0 +; FLATSCR-NEXT: scratch_load_short_d16_hi v1, off, s0 offset:4 +; FLATSCR-NEXT: s_mov_b32 s0, 0x5040100 +; FLATSCR-NEXT: s_waitcnt vmcnt(1) +; FLATSCR-NEXT: v_perm_b32 v0, v0, v3, s0 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; FLATSCR-NEXT: s_endpgm @@ -537,8 +543,13 @@ define amdgpu_kernel void @vload2_private(ptr addrspace(1) nocapture readonly %i ; FLATSCR_GFX10-NEXT: scratch_store_short off, v0, s0 offset:4 ; FLATSCR_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FLATSCR_GFX10-NEXT: s_clause 0x1 -; FLATSCR_GFX10-NEXT: scratch_load_dword v0, off, s0 -; FLATSCR_GFX10-NEXT: scratch_load_dword v1, off, s0 offset:2 +; FLATSCR_GFX10-NEXT: scratch_load_ushort v0, off, s0 offset:2 +; FLATSCR_GFX10-NEXT: scratch_load_ushort v3, off, s0 +; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(1) +; FLATSCR_GFX10-NEXT: v_mov_b32_e32 v1, v0 +; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) +; FLATSCR_GFX10-NEXT: v_perm_b32 v0, v0, v3, 0x5040100 +; FLATSCR_GFX10-NEXT: scratch_load_short_d16_hi v1, off, s0 offset:4 ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) ; FLATSCR_GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; FLATSCR_GFX10-NEXT: s_endpgm @@ -561,8 +572,13 @@ define amdgpu_kernel void @vload2_private(ptr addrspace(1) nocapture readonly %i ; GFX11-NEXT: scratch_store_b16 off, v0, off offset:4 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_clause 0x1 -; GFX11-NEXT: scratch_load_b32 v0, off, off -; GFX11-NEXT: scratch_load_b32 v1, off, off offset:2 +; GFX11-NEXT: scratch_load_u16 v0, off, off offset:2 +; GFX11-NEXT: scratch_load_u16 v3, off, off +; GFX11-NEXT: s_waitcnt vmcnt(1) +; GFX11-NEXT: v_mov_b32_e32 v1, v0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_perm_b32 v0, v0, v3, 0x5040100 +; GFX11-NEXT: scratch_load_d16_hi_b16 v1, off, off offset:4 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[2:3] ; GFX11-NEXT: s_nop 0 @@ -703,12 +719,12 @@ define <2 x i16> @chain_hi_to_lo_private_other_dep(ptr addrspace(5) %ptr) { ; FLATSCR-LABEL: chain_hi_to_lo_private_other_dep: ; FLATSCR: ; %bb.0: ; %bb ; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FLATSCR-NEXT: scratch_load_dword v0, v0, off -; FLATSCR-NEXT: s_mov_b32 s0, 0x7060302 +; FLATSCR-NEXT: scratch_load_short_d16_hi v1, v0, off ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: v_lshlrev_b32_e32 v1, 16, v0 ; FLATSCR-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0] -; FLATSCR-NEXT: v_perm_b32 v0, v1, v0, s0 +; FLATSCR-NEXT: scratch_load_short_d16 v1, v0, off offset:2 +; FLATSCR-NEXT: s_waitcnt vmcnt(0) +; FLATSCR-NEXT: v_mov_b32_e32 v0, v1 ; FLATSCR-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_DEFAULT-LABEL: chain_hi_to_lo_private_other_dep: @@ -725,22 +741,23 @@ define <2 x i16> @chain_hi_to_lo_private_other_dep(ptr addrspace(5) %ptr) { ; FLATSCR_GFX10-LABEL: chain_hi_to_lo_private_other_dep: ; FLATSCR_GFX10: ; %bb.0: ; %bb ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FLATSCR_GFX10-NEXT: scratch_load_dword v0, v0, off +; FLATSCR_GFX10-NEXT: scratch_load_short_d16_hi v1, v0, off ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) -; FLATSCR_GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v0 ; FLATSCR_GFX10-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0] -; FLATSCR_GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x7060302 +; FLATSCR_GFX10-NEXT: scratch_load_short_d16 v1, v0, off offset:2 +; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) +; FLATSCR_GFX10-NEXT: v_mov_b32_e32 v0, v1 ; FLATSCR_GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: chain_hi_to_lo_private_other_dep: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: scratch_load_b32 v0, v0, off +; GFX11-NEXT: scratch_load_d16_hi_b16 v1, v0, off ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0] -; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x7060302 +; GFX11-NEXT: scratch_load_d16_b16 v1, v0, off offset:2 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: v_mov_b32_e32 v0, v1 ; GFX11-NEXT: s_setpc_b64 s[30:31] bb: %gep_lo = getelementptr inbounds i16, ptr addrspace(5) %ptr, i64 1 diff --git a/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll b/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll index 4e734d6e0884bc..fc33a274d7b11a 100644 --- a/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll +++ b/llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll @@ -607,7 +607,14 @@ define amdgpu_kernel void @local_store_align1_v16i8(ptr addrspace(3) %out) #0 { ; MUBUF: buffer_load_ubyte ; MUBUF: buffer_load_ubyte ; MUBUF: buffer_load_ubyte -; FLATSCR: scratch_load_dwordx2 +; FLATSCR: scratch_load_ubyte +; FLATSCR: scratch_load_ubyte +; FLATSCR: scratch_load_ubyte +; FLATSCR: scratch_load_ubyte +; FLATSCR: scratch_load_ubyte +; FLATSCR: scratch_load_ubyte +; FLATSCR: scratch_load_ubyte +; FLATSCR: scratch_load_ubyte define double @private_load_align1_f64(ptr addrspace(5) %in) { %x = load double, ptr addrspace(5) %in, align 1 ret double %x @@ -622,7 +629,14 @@ define double @private_load_align1_f64(ptr addrspace(5) %in) { ; MUBUF: buffer_store_byte ; MUBUF: buffer_store_byte ; MUBUF: buffer_store_byte -; FLATSCR: scratch_store_dwordx2 +; FLATSCR: scratch_store_byte +; FLATSCR: scratch_store_byte +; FLATSCR: scratch_store_byte +; FLATSCR: scratch_store_byte +; FLATSCR: scratch_store_byte +; FLATSCR: scratch_store_byte +; FLATSCR: scratch_store_byte +; FLATSCR: scratch_store_byte define void @private_store_align1_f64(ptr addrspace(5) %out, double %x) #0 { store double %x, ptr addrspace(5) %out, align 1 ret void @@ -651,7 +665,10 @@ define void @private_store_align4_f64(ptr addrspace(5) %out, double %x) #0 { ; MUBUF: buffer_load_ushort ; MUBUF: buffer_load_ushort ; MUBUF: buffer_load_ushort -; FLATSCR: scratch_load_dwordx2 +; FLATSCR: scratch_load_ushort +; FLATSCR: scratch_load_ushort +; FLATSCR: scratch_load_ushort +; FLATSCR: scratch_load_ushort define double @private_load_align2_f64(ptr addrspace(5) %in) { %x = load double, ptr addrspace(5) %in, align 2 ret double %x @@ -662,7 +679,10 @@ define double @private_load_align2_f64(ptr addrspace(5) %in) { ; MUBUF: buffer_store_short ; MUBUF: buffer_store_short ; MUBUF: buffer_store_short -; FLATSCR: scratch_store_dwordx2 +; FLATSCR: scratch_store_short +; FLATSCR: scratch_store_short +; FLATSCR: scratch_store_short +; FLATSCR: scratch_store_short define void @private_store_align2_f64(ptr addrspace(5) %out, double %x) #0 { store double %x, ptr addrspace(5) %out, align 2 ret void