Skip to content

Commit

Permalink
MCExpr-ify amd_kernel_code_t (#91587)
Browse files Browse the repository at this point in the history
Redefines the amd_kernel_code_t struct with MCExprs for members that would be
derived from SIProgramInfo MCExpr members.
  • Loading branch information
JanekvO authored May 22, 2024
1 parent cdcd653 commit a699ccb
Show file tree
Hide file tree
Showing 14 changed files with 831 additions and 179 deletions.
32 changes: 15 additions & 17 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include "AMDGPU.h"
#include "AMDGPUHSAMetadataStreamer.h"
#include "AMDGPUResourceUsageAnalysis.h"
#include "AMDKernelCodeT.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCExpr.h"
Expand All @@ -29,6 +28,7 @@
#include "SIMachineFunctionInfo.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDKernelCodeTUtils.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
Expand Down Expand Up @@ -205,8 +205,9 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
if (STM.isMesaKernel(F) &&
(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
amd_kernel_code_t KernelCode;
AMDGPUMCKernelCodeT KernelCode;
getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
KernelCode.validate(&STM, MF->getContext());
getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
}

Expand Down Expand Up @@ -1317,7 +1318,7 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
}
}

void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out,
const SIProgramInfo &CurrentProgramInfo,
const MachineFunction &MF) const {
const Function &F = MF.getFunction();
Expand All @@ -1328,24 +1329,22 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
MCContext &Ctx = MF.getContext();

AMDGPU::initDefaultAMDKernelCodeT(Out, &STM);
Out.initDefault(&STM, Ctx, /*InitMCExpr=*/false);

Out.compute_pgm_resource_registers =
CurrentProgramInfo.getComputePGMRSrc1(STM) |
(CurrentProgramInfo.getComputePGMRSrc2() << 32);
Out.compute_pgm_resource1_registers =
CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx);
Out.compute_pgm_resource2_registers =
CurrentProgramInfo.getComputePGMRSrc2(Ctx);
Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;

if (getMCExprValue(CurrentProgramInfo.DynamicCallStack, Ctx))
Out.code_properties |= AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK;
Out.is_dynamic_callstack = CurrentProgramInfo.DynamicCallStack;

AMD_HSA_BITS_SET(Out.code_properties,
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
AMD_HSA_BITS_SET(Out.code_properties, AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));

const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo();
if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
Out.code_properties |=
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
}

if (UserSGPRInfo.hasDispatchPtr())
Expand All @@ -1371,10 +1370,9 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,

Align MaxKernArgAlign;
Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
Out.wavefront_sgpr_count = getMCExprValue(CurrentProgramInfo.NumSGPR, Ctx);
Out.workitem_vgpr_count = getMCExprValue(CurrentProgramInfo.NumVGPR, Ctx);
Out.workitem_private_segment_byte_size =
getMCExprValue(CurrentProgramInfo.ScratchSize, Ctx);
Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;

// kernarg_segment_alignment is specified as log of the alignment.
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
#include "SIProgramInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"

struct amd_kernel_code_t;

namespace llvm {

class AMDGPUMachineFunction;
Expand All @@ -29,6 +27,7 @@ class MCOperand;

namespace AMDGPU {
struct MCKernelDescriptor;
struct AMDGPUMCKernelCodeT;
namespace HSAMD {
class MetadataStreamer;
}
Expand All @@ -50,7 +49,8 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
uint64_t getFunctionCodeSize(const MachineFunction &MF) const;

void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo,
void getAmdKernelCode(AMDGPU::AMDGPUMCKernelCodeT &Out,
const SIProgramInfo &KernelInfo,
const MachineFunction &MF) const;

/// Emit register usage information so that the GPU driver
Expand Down
51 changes: 11 additions & 40 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1340,7 +1340,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool ParseDirectiveAMDGCNTarget();
bool ParseDirectiveAMDHSACodeObjectVersion();
bool ParseDirectiveAMDHSAKernel();
bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
bool ParseDirectiveAMDKernelCodeT();
// TODO: Possibly make subtargetHasRegister const.
bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
Expand Down Expand Up @@ -5863,7 +5863,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
}

bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
amd_kernel_code_t &Header) {
AMDGPUMCKernelCodeT &C) {
// max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
// assembly for backwards compatibility.
if (ID == "max_scratch_backing_memory_byte_size") {
Expand All @@ -5873,25 +5873,13 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,

SmallString<40> ErrStr;
raw_svector_ostream Err(ErrStr);
if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
return TokError(Err.str());
}
Lex();

if (ID == "enable_dx10_clamp") {
if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
isGFX12Plus())
return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
}

if (ID == "enable_ieee_mode") {
if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
isGFX12Plus())
return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
}

if (ID == "enable_wavefront_size32") {
if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
if (!isGFX10Plus())
return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
Expand All @@ -5903,41 +5891,23 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
}

if (ID == "wavefront_size") {
if (Header.wavefront_size == 5) {
if (C.wavefront_size == 5) {
if (!isGFX10Plus())
return TokError("wavefront_size=5 is only allowed on GFX10+");
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
return TokError("wavefront_size=5 requires +WavefrontSize32");
} else if (Header.wavefront_size == 6) {
} else if (C.wavefront_size == 6) {
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
return TokError("wavefront_size=6 requires +WavefrontSize64");
}
}

if (ID == "enable_wgp_mode") {
if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
!isGFX10Plus())
return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
}

if (ID == "enable_mem_ordered") {
if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
!isGFX10Plus())
return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
}

if (ID == "enable_fwd_progress") {
if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
!isGFX10Plus())
return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
}

return false;
}

bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
amd_kernel_code_t Header;
AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
AMDGPUMCKernelCodeT KernelCode;
KernelCode.initDefault(&getSTI(), getContext());

while (true) {
// Lex EndOfStatement. This is in a while loop, because lexing a comment
Expand All @@ -5951,11 +5921,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
if (ID == ".end_amd_kernel_code_t")
break;

if (ParseAMDKernelCodeTValue(ID, Header))
if (ParseAMDKernelCodeTValue(ID, KernelCode))
return true;
}

getTargetStreamer().EmitAMDKernelCodeT(Header);
KernelCode.validate(&getSTI(), getContext());
getTargetStreamer().EmitAMDKernelCodeT(KernelCode);

return false;
}
Expand Down
12 changes: 4 additions & 8 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
#include "AMDGPUTargetStreamer.h"
#include "AMDGPUMCKernelDescriptor.h"
#include "AMDGPUPTNote.h"
#include "AMDKernelCodeT.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDKernelCodeTUtils.h"
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
Expand Down Expand Up @@ -240,10 +239,9 @@ void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion(
OS << "\t.amdhsa_code_object_version " << COV << '\n';
}

void
AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
OS << "\t.amd_kernel_code_t\n";
dumpAmdKernelCode(&Header, OS, "\t\t");
Header.EmitKernelCodeT(OS, getContext());
OS << "\t.end_amd_kernel_code_t\n";
}

Expand Down Expand Up @@ -789,12 +787,10 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {

void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}

void
AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {

void AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
MCStreamer &OS = getStreamer();
OS.pushSection();
OS.emitBytes(StringRef((const char*)&Header, sizeof(Header)));
Header.EmitKernelCodeT(OS, getContext());
OS.popSection();
}

Expand Down
9 changes: 4 additions & 5 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
#include "Utils/AMDGPUPALMetadata.h"
#include "llvm/MC/MCStreamer.h"

struct amd_kernel_code_t;

namespace llvm {

class MCELFStreamer;
Expand All @@ -23,6 +21,7 @@ class formatted_raw_ostream;

namespace AMDGPU {

struct AMDGPUMCKernelCodeT;
struct MCKernelDescriptor;
namespace HSAMD {
struct Metadata;
Expand Down Expand Up @@ -54,7 +53,7 @@ class AMDGPUTargetStreamer : public MCTargetStreamer {
CodeObjectVersion = COV;
}

virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header){};
virtual void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) {};

virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type){};

Expand Down Expand Up @@ -130,7 +129,7 @@ class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {

void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV) override;

void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override;

void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;

Expand Down Expand Up @@ -186,7 +185,7 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {

void EmitDirectiveAMDGCNTarget() override;

void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header) override;

void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIDefines.h
Original file line number Diff line number Diff line change
Expand Up @@ -1111,7 +1111,7 @@ enum Type { TRAP = -2, WORKGROUP = -1 };
#define C_00B84C_LDS_SIZE 0xFF007FFF
#define S_00B84C_EXCP_EN(x) (((x) & 0x7F) << 24)
#define G_00B84C_EXCP_EN(x) (((x) >> 24) & 0x7F)
#define C_00B84C_EXCP_EN
#define C_00B84C_EXCP_EN 0x80FFFFFF

#define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC
#define R_0286D0_SPI_PS_INPUT_ADDR 0x0286D0
Expand Down
45 changes: 22 additions & 23 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "AMDGPUAsmUtils.h"
#include "AMDKernelCodeT.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "Utils/AMDKernelCodeTUtils.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/IR/Attributes.h"
Expand Down Expand Up @@ -1218,39 +1219,37 @@ unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI,
}
} // end namespace IsaInfo

void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode,
const MCSubtargetInfo *STI) {
IsaVersion Version = getIsaVersion(STI->getCPU());

memset(&Header, 0, sizeof(Header));

Header.amd_kernel_code_version_major = 1;
Header.amd_kernel_code_version_minor = 2;
Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
Header.amd_machine_version_major = Version.Major;
Header.amd_machine_version_minor = Version.Minor;
Header.amd_machine_version_stepping = Version.Stepping;
Header.kernel_code_entry_byte_offset = sizeof(Header);
Header.wavefront_size = 6;
KernelCode.amd_kernel_code_version_major = 1;
KernelCode.amd_kernel_code_version_minor = 2;
KernelCode.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
KernelCode.amd_machine_version_major = Version.Major;
KernelCode.amd_machine_version_minor = Version.Minor;
KernelCode.amd_machine_version_stepping = Version.Stepping;
KernelCode.kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t);
if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
KernelCode.wavefront_size = 5;
KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
} else {
KernelCode.wavefront_size = 6;
}

// If the code object does not support indirect functions, then the value must
// be 0xffffffff.
Header.call_convention = -1;
KernelCode.call_convention = -1;

// These alignment values are specified in powers of two, so alignment =
// 2^n. The minimum alignment is 2^4 = 16.
Header.kernarg_segment_alignment = 4;
Header.group_segment_alignment = 4;
Header.private_segment_alignment = 4;
KernelCode.kernarg_segment_alignment = 4;
KernelCode.group_segment_alignment = 4;
KernelCode.private_segment_alignment = 4;

if (Version.Major >= 10) {
if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
Header.wavefront_size = 5;
Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
}
Header.compute_pgm_resource_registers |=
S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
S_00B848_MEM_ORDERED(1);
KernelCode.compute_pgm_resource_registers |=
S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
S_00B848_MEM_ORDERED(1);
}
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class raw_ostream;

namespace AMDGPU {

struct AMDGPUMCKernelCodeT;
struct IsaVersion;

/// Generic target versions emitted by this version of LLVM.
Expand Down Expand Up @@ -860,7 +861,7 @@ unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
LLVM_READONLY
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);

void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &Header,
const MCSubtargetInfo *STI);

bool isGroupSegment(const GlobalValue *GV);
Expand Down
Loading

0 comments on commit a699ccb

Please sign in to comment.