Skip to content

Commit

Permalink
[AMDGPU] Use directive for kernarg preload header padding (llvm#86004)
Browse files Browse the repository at this point in the history
(cherry picked from commit b5b34db)
Change-Id: Ib6d6a4089f5d6601536eeadd1917cf3121e6f4cc
  • Loading branch information
kerbowa authored and bcahoon committed Jun 5, 2024
1 parent 658e51b commit a9c7f77
Show file tree
Hide file tree
Showing 3 changed files with 325 additions and 8,757 deletions.
21 changes: 9 additions & 12 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,15 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
return true;
}

bool AMDGPUTargetAsmStreamer::EmitKernargPreloadHeader(
const MCSubtargetInfo &STI, bool TrapEnabled) {
OS << (TrapEnabled ? "\ts_trap 2" : "\ts_endpgm")
<< " ; Kernarg preload header. Trap with incompatible firmware that "
"doesn't support preloading kernel arguments.\n";
OS << "\t.fill 63, 4, 0xbf800000 ; s_nop 0\n";
return true;
}

bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
const uint32_t Encoded_s_code_end = 0xbf9f0000;
const uint32_t Encoded_s_nop = 0xbf800000;
Expand Down Expand Up @@ -842,18 +851,6 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
return true;
}

bool AMDGPUTargetAsmStreamer::EmitKernargPreloadHeader(
const MCSubtargetInfo &STI, bool TrapEnabled) {
const char *TrapInstr = TrapEnabled ? "\ts_trap 2" : "\ts_endpgm";
OS << TrapInstr
<< " ; Trap with incompatible firmware that doesn't "
"support preloading kernel arguments.\n";
for (int i = 0; i < 63; ++i) {
OS << "\ts_nop 0\n";
}
return true;
}

bool AMDGPUTargetELFStreamer::EmitKernargPreloadHeader(
const MCSubtargetInfo &STI, bool TrapEnabled) {
const uint32_t Encoded_s_nop = 0xbf800000;
Expand Down
11 changes: 7 additions & 4 deletions llvm/test/CodeGen/AMDGPU/preload-kernarg-header.ll
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,HSA %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,HSA %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,NON-HSA %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,HSA,ASM %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,HSA,OBJ %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -filetype=obj < %s | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,NON-HSA,OBJ %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -amdgpu-kernarg-preload-count=1 -asm-verbose=0 < %s | llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj | llvm-objdump --arch=amdgcn --mcpu=gfx940 --disassemble - | FileCheck -check-prefixes=GCN,HSA,OBJ %s

; GCN: preload_kernarg_header
; HSA: s_trap 2
; NON-HSA: s_endpgm
; GCN-COUNT-63: s_nop 0
; ASM: .fill 63, 4, 0xbf800000 ; s_nop 0
; OBJ-COUNT-63: s_nop 0
define amdgpu_kernel void @preload_kernarg_header(ptr %arg) {
store ptr %arg, ptr %arg
ret void
}

; GCN: non_kernel_function
; GCN-NOT: s_trap 2
; GCN-NOT: s_nop 0
; GCN: flat_store
define void @non_kernel_function(ptr %arg) {
Expand Down
Loading

0 comments on commit a9c7f77

Please sign in to comment.