Skip to content

Commit

Permalink
fixup! fixup! [AMDGPU] Enable unaligned scratch accesses
Browse files Browse the repository at this point in the history
add FeatureUnalignedScratchAccess to gfx12
  • Loading branch information
ritter-x2a committed Oct 7, 2024
1 parent 0d0c4e0 commit 0e137e4
Show file tree
Hide file tree
Showing 4 changed files with 230 additions and 2,292 deletions.
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1239,9 +1239,9 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
FeatureVOP3Literal, FeatureDPP8,
FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
FeatureA16, FeatureFastDenormalF32, FeatureG16,
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess,
FeatureTrue16BitInsts, FeatureDefaultComponentBroadcast,
FeatureMaxHardClauseLength32,
FeatureUnalignedBufferAccess, FeatureUnalignedScratchAccess,
FeatureUnalignedDSAccess, FeatureTrue16BitInsts,
FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
FeatureAgentScopeFineGrainedRemoteMemoryAtomics
]
Expand Down
167 changes: 21 additions & 146 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2432,39 +2432,12 @@ define void @store_load_i64_unaligned(ptr addrspace(5) nocapture %arg) {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_mov_b32 v2, 0
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v1, off scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:1 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:3 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:4 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:5 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:6 scope:SCOPE_SYS
; GFX12-NEXT: v_mov_b32_e32 v1, 15
; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:7 scope:SCOPE_SYS
; GFX12-NEXT: scratch_store_b64 v0, v[1:2], off scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:1 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:2 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:3 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:4 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:5 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:6 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v0, v0, off offset:7 scope:SCOPE_SYS
; GFX12-NEXT: scratch_load_b64 v[0:1], v0, off scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
Expand Down Expand Up @@ -2782,57 +2755,16 @@ define void @store_load_v3i32_unaligned(ptr addrspace(5) nocapture %arg) {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 0
; GFX12-NEXT: v_mov_b32_e32 v3, 2
; GFX12-NEXT: s_mov_b32 s2, 3
; GFX12-NEXT: s_mov_b32 s1, 2
; GFX12-NEXT: s_mov_b32 s0, 1
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v3, s2 :: v_dual_mov_b32 v2, s1
; GFX12-NEXT: v_mov_b32_e32 v1, s0
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v1, off scope:SCOPE_SYS
; GFX12-NEXT: scratch_store_b96 v0, v[1:3], off scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:1 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:3 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: v_mov_b32_e32 v1, 3
; GFX12-NEXT: scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:5 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:6 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:7 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v1, off offset:8 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:9 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:10 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:11 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:1 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:2 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:3 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:4 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:5 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:6 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:7 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:8 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:9 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:10 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v0, v0, off offset:11 scope:SCOPE_SYS
; GFX12-NEXT: scratch_load_b96 v[0:2], v0, off scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
Expand Down Expand Up @@ -3267,74 +3199,17 @@ define void @store_load_v4i32_unaligned(ptr addrspace(5) nocapture %arg) {
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 0
; GFX12-NEXT: v_mov_b32_e32 v3, 2
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v1, off scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:1 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:2 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:3 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v3, off offset:4 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: v_mov_b32_e32 v1, 3
; GFX12-NEXT: v_mov_b32_e32 v3, 4
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:5 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:6 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:7 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v1, off offset:8 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:9 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:10 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:11 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v3, off offset:12 scope:SCOPE_SYS
; GFX12-NEXT: s_mov_b32 s3, 4
; GFX12-NEXT: s_mov_b32 s2, 3
; GFX12-NEXT: s_mov_b32 s1, 2
; GFX12-NEXT: s_mov_b32 s0, 1
; GFX12-NEXT: s_wait_alu 0xfffe
; GFX12-NEXT: v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2
; GFX12-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:13 scope:SCOPE_SYS
; GFX12-NEXT: scratch_store_b128 v0, v[1:4], off scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:14 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_store_b8 v0, v2, off offset:15 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:1 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:2 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:3 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:4 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:5 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:6 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:7 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:8 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:9 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:10 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:11 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:12 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:13 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v1, v0, off offset:14 scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: scratch_load_u8 v0, v0, off offset:15 scope:SCOPE_SYS
; GFX12-NEXT: scratch_load_b128 v[0:3], v0, off scope:SCOPE_SYS
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
;
Expand Down
Loading

0 comments on commit 0e137e4

Please sign in to comment.