[AMDGPU] Generate checks for vector indexing. NFC. (llvm#105668)

This allows combining some test files that were only split because adding new RUN lines introduced too much churn in the checks.
Harini0924 · Aug 22, 2024 · c4c5fdd · c4c5fdd
1 parent ec5e585
commit c4c5fdd
Show file tree

Hide file tree

Showing 4 changed files with 8,066 additions and 496 deletions.
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll
@@ -2,70 +2,6 @@
 
 ; indexing of vectors.
 
-; Subtest below moved from file test/CodeGen/AMDGPU/indirect-addressing-si.ll
-; to avoid gfx9 scheduling induced issues.
-
-
-; GCN-LABEL: {{^}}insert_vgpr_offset_multiple_in_block:
-; GCN-DAG: s_load_dwordx16 s[[[S_ELT0:[0-9]+]]:[[S_ELT15:[0-9]+]]]
-; GCN-DAG: {{buffer|flat|global}}_load_dword [[IDX0:v[0-9]+]]
-; GCN-DAG: v_mov_b32 [[INS0:v[0-9]+]], 62
-
-; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT15:[0-9]+]], s[[S_ELT15]]
-; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT0:[0-9]+]], s[[S_ELT0]]
-
-; GCN: v_cmp_eq_u32_e32
-; GCN-COUNT-32: v_cndmask_b32
-
-; GCN-COUNT-4: buffer_store_dwordx4
-define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %in, <16 x i32> %vec0) #0 {
-entry:
-  %id = call i32 @llvm.amdgcn.workitem.id.x() #1
-  %id.ext = zext i32 %id to i64
-  %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %id.ext
-  %idx0 = load volatile i32, ptr addrspace(1) %gep
-  %idx1 = add i32 %idx0, 1
-  %live.out.val = call i32 asm sideeffect "v_mov_b32 $0, 62", "=v"()
-  %vec1 = insertelement <16 x i32> %vec0, i32 %live.out.val, i32 %idx0
-  %vec2 = insertelement <16 x i32> %vec1, i32 63, i32 %idx1
-  store volatile <16 x i32> %vec2, ptr addrspace(1) %out0
-  %cmp = icmp eq i32 %id, 0
-  br i1 %cmp, label %bb1, label %bb2
-
-bb1:
-  store volatile i32 %live.out.val, ptr addrspace(1) undef
-  br label %bb2
-
-bb2:
-  ret void
-}
-
-; Avoid inserting extra v_mov from copies within the vgpr indexing sequence. The
-; gpr_idx mode switching sequence is expanded late for this reason.
-
-; GCN-LABEL: {{^}}insert_w_offset_multiple_in_block
-
-; GCN: s_set_gpr_idx_on
-; GCN-NEXT: v_mov_b32_e32
-; GCN-NEXT: s_set_gpr_idx_off
-
-; GCN: s_set_gpr_idx_on
-; GCN-NEXT: v_mov_b32_e32
-; GCN-NOT: v_mov_b32_e32
-; GCN-NEXT: s_set_gpr_idx_off
-define amdgpu_kernel void @insert_w_offset_multiple_in_block(ptr addrspace(1) %out1, i32 %in) #0 {
-entry:
-  %add1 = add i32 %in, 1
-  %ins1 = insertelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, float 17.0, i32 %add1
-  %add2 = add i32 %in, 2
-  %ins2 = insertelement <16 x float> %ins1, float 17.0, i32 %add2
-  store <16 x float> %ins1, ptr addrspace(1) %out1
-  %out2 = getelementptr <16 x float>, ptr addrspace(1) %out1, i32 1
-  store <16 x float> %ins2, ptr addrspace(1) %out2
-
-  ret void
-}
-
 declare hidden void @foo()
 
 ; For functions with calls, we were not accounting for m0_lo16/m0_hi16
@@ -83,7 +19,4 @@ define amdgpu_kernel void @insertelement_with_call(ptr addrspace(1) %ptr, i32 %i
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare void @llvm.amdgcn.s.barrier() #2
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-pregfx9.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-pregfx9.ll