-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
MCExpr-ify amd_kernel_code_t #91587
MCExpr-ify amd_kernel_code_t #91587
Conversation
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-amdgpu Author: Janek van Oirschot (JanekvO) ChangesWraps an AMDGPUMCKernelCodeT struct over the existing amd_kernel_code_t struct with MCExprs for members that would be derived from SIProgramInfo MCExpr members. Patch is 50.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91587.diff 11 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index de81904143b7b..8343d3d83d22e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -19,10 +19,10 @@
#include "AMDGPU.h"
#include "AMDGPUHSAMetadataStreamer.h"
#include "AMDGPUResourceUsageAnalysis.h"
-#include "AMDKernelCodeT.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCExpr.h"
+#include "MCTargetDesc/AMDGPUMCKernelCodeT.h"
#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "R600AsmPrinter.h"
@@ -205,8 +205,9 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
if (STM.isMesaKernel(F) &&
(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
- amd_kernel_code_t KernelCode;
+ MCKernelCodeT KernelCode;
getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);
+ KernelCode.validate(&STM, MF->getContext());
getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
}
@@ -1320,7 +1321,7 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
}
}
-void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
+void AMDGPUAsmPrinter::getAmdKernelCode(MCKernelCodeT &Out,
const SIProgramInfo &CurrentProgramInfo,
const MachineFunction &MF) const {
const Function &F = MF.getFunction();
@@ -1331,59 +1332,62 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
MCContext &Ctx = MF.getContext();
- AMDGPU::initDefaultAMDKernelCodeT(Out, &STM);
+ AMDGPU::initDefaultAMDKernelCodeT(Out.KernelCode, &STM);
- Out.compute_pgm_resource_registers =
- CurrentProgramInfo.getComputePGMRSrc1(STM) |
- (CurrentProgramInfo.getComputePGMRSrc2() << 32);
- Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
+ Out.compute_pgm_resource1_registers =
+ CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx);
+ Out.compute_pgm_resource2_registers =
+ CurrentProgramInfo.getComputePGMRSrc2(Ctx);
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
- if (getMCExprValue(CurrentProgramInfo.DynamicCallStack, Ctx))
- Out.code_properties |= AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK;
+ {
+ const MCExpr *Shift = MCConstantExpr::create(AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT, Ctx);
+ Out.is_dynamic_callstack = MCBinaryExpr::createShl(
+ CurrentProgramInfo.DynamicCallStack, Shift, Ctx);
+ }
- AMD_HSA_BITS_SET(Out.code_properties,
+ AMD_HSA_BITS_SET(Out.KernelCode.code_properties,
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));
const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo();
if (UserSGPRInfo.hasPrivateSegmentBuffer()) {
- Out.code_properties |=
+ Out.KernelCode.code_properties |=
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
}
if (UserSGPRInfo.hasDispatchPtr())
- Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5)
- Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
if (UserSGPRInfo.hasKernargSegmentPtr())
- Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
if (UserSGPRInfo.hasDispatchID())
- Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
if (UserSGPRInfo.hasFlatScratchInit())
- Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
if (UserSGPRInfo.hasDispatchPtr())
- Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
if (STM.isXNACKEnabled())
- Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
+ Out.KernelCode.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
Align MaxKernArgAlign;
- Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
- Out.wavefront_sgpr_count = getMCExprValue(CurrentProgramInfo.NumSGPR, Ctx);
- Out.workitem_vgpr_count = getMCExprValue(CurrentProgramInfo.NumVGPR, Ctx);
- Out.workitem_private_segment_byte_size =
- getMCExprValue(CurrentProgramInfo.ScratchSize, Ctx);
- Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
+ Out.KernelCode.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
+ Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;
+ Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;
+ Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;
+ Out.KernelCode.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;
// kernarg_segment_alignment is specified as log of the alignment.
// The minimum alignment is 16.
// FIXME: The metadata treats the minimum as 4?
- Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
+ Out.KernelCode.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
}
bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 16d8952a533ef..c5abbd3c8c084 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -17,8 +17,6 @@
#include "SIProgramInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"
-struct amd_kernel_code_t;
-
namespace llvm {
class AMDGPUMachineFunction;
@@ -29,6 +27,7 @@ class MCOperand;
namespace AMDGPU {
struct MCKernelDescriptor;
+struct MCKernelCodeT;
namespace HSAMD {
class MetadataStreamer;
}
@@ -50,7 +49,7 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
- void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo,
+ void getAmdKernelCode(AMDGPU::MCKernelCodeT &Out, const SIProgramInfo &KernelInfo,
const MachineFunction &MF) const;
/// Emit register usage information so that the GPU driver
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index d47a5f8ebb815..b8bdf816a9932 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -8,6 +8,7 @@
#include "AMDKernelCodeT.h"
#include "MCTargetDesc/AMDGPUMCExpr.h"
+#include "MCTargetDesc/AMDGPUMCKernelCodeT.h"
#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
@@ -1340,7 +1341,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool ParseDirectiveAMDGCNTarget();
bool ParseDirectiveAMDHSACodeObjectVersion();
bool ParseDirectiveAMDHSAKernel();
- bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
+ bool ParseAMDKernelCodeTValue(StringRef ID, MCKernelCodeT &Header);
bool ParseDirectiveAMDKernelCodeT();
// TODO: Possibly make subtargetHasRegister const.
bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
@@ -5872,8 +5873,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
return false;
}
-bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
- amd_kernel_code_t &Header) {
+bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, MCKernelCodeT &C) {
// max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
// assembly for backwards compatibility.
if (ID == "max_scratch_backing_memory_byte_size") {
@@ -5883,25 +5883,14 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
SmallString<40> ErrStr;
raw_svector_ostream Err(ErrStr);
- if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
+ if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
return TokError(Err.str());
}
Lex();
- if (ID == "enable_dx10_clamp") {
- if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
- isGFX12Plus())
- return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
- }
-
- if (ID == "enable_ieee_mode") {
- if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
- isGFX12Plus())
- return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
- }
-
if (ID == "enable_wavefront_size32") {
- if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
+ if (C.KernelCode.code_properties &
+ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
if (!isGFX10Plus())
return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
@@ -5913,41 +5902,23 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
}
if (ID == "wavefront_size") {
- if (Header.wavefront_size == 5) {
+ if (C.KernelCode.wavefront_size == 5) {
if (!isGFX10Plus())
return TokError("wavefront_size=5 is only allowed on GFX10+");
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
return TokError("wavefront_size=5 requires +WavefrontSize32");
- } else if (Header.wavefront_size == 6) {
+ } else if (C.KernelCode.wavefront_size == 6) {
if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
return TokError("wavefront_size=6 requires +WavefrontSize64");
}
}
- if (ID == "enable_wgp_mode") {
- if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
- !isGFX10Plus())
- return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
- }
-
- if (ID == "enable_mem_ordered") {
- if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
- !isGFX10Plus())
- return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
- }
-
- if (ID == "enable_fwd_progress") {
- if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
- !isGFX10Plus())
- return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
- }
-
return false;
}
bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
- amd_kernel_code_t Header;
- AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
+ MCKernelCodeT KernelCode;
+ KernelCode.initDefault(&getSTI(), getContext());
while (true) {
// Lex EndOfStatement. This is in a while loop, because lexing a comment
@@ -5961,11 +5932,12 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
if (ID == ".end_amd_kernel_code_t")
break;
- if (ParseAMDKernelCodeTValue(ID, Header))
+ if (ParseAMDKernelCodeTValue(ID, KernelCode))
return true;
}
- getTargetStreamer().EmitAMDKernelCodeT(Header);
+ KernelCode.validate(&getSTI(), getContext());
+ getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
new file mode 100644
index 0000000000000..7c081d98dadbf
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCKernelCodeT.cpp
@@ -0,0 +1,549 @@
+//===--- AMDHSAKernelCodeT.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCKernelCodeT.h"
+#include "AMDKernelCodeT.h"
+#include "SIDefines.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::AMDGPU;
+
+// Generates the following for MCKernelCodeT struct members:
+// - HasMemberXXXXX class
+// A check to see if MCKernelCodeT has a specific member so it can determine
+// which of the original amd_kernel_code_t members are duplicated (if the
+// names don't match, the table driven strategy won't work).
+// - GetMemberXXXXX class
+// A retrieval helper for said member (of type const MCExpr *&). Will return
+// a `Phony` const MCExpr * initialized to nullptr to preserve reference
+// returns.
+#define GEN_HAS_MEMBER(member) \
+ class HasMember##member { \
+ private: \
+ struct KnownWithMember { \
+ int member; \
+ }; \
+ class AmbiguousDerived : public MCKernelCodeT, public KnownWithMember {}; \
+ template <typename U> \
+ static constexpr std::false_type Test(decltype(U::member) *); \
+ template <typename U> static constexpr std::true_type Test(...); \
+ \
+ public: \
+ static constexpr bool RESULT = \
+ std::is_same_v<decltype(Test<AmbiguousDerived>(nullptr)), \
+ std::true_type>; \
+ }; \
+ class GetMember##member { \
+ public: \
+ static const MCExpr *Phony; \
+ template <typename U, typename std::enable_if_t<HasMember##member::RESULT, \
+ U> * = nullptr> \
+ static const MCExpr *&Get(U &C) { \
+ assert(HasMember##member::RESULT && \
+ "Trying to retrieve member that does not exist."); \
+ return C.member; \
+ } \
+ template <typename U, typename std::enable_if_t< \
+ !HasMember##member::RESULT, U> * = nullptr> \
+ static const MCExpr *&Get(U &C) { \
+ return Phony; \
+ } \
+ }; \
+ const MCExpr *GetMember##member::Phony = nullptr;
+
+// Cannot generate class declarations using the table driver approach (see table
+// in AMDKernelCodeTInfo.h). Luckily, if any are missing here or eventually
+// added to the table, an error should occur when trying to retrieve the table
+// in getMCExprIndexTable.
+GEN_HAS_MEMBER(amd_code_version_major)
+GEN_HAS_MEMBER(amd_code_version_minor)
+GEN_HAS_MEMBER(amd_machine_kind)
+GEN_HAS_MEMBER(amd_machine_version_major)
+GEN_HAS_MEMBER(amd_machine_version_minor)
+GEN_HAS_MEMBER(amd_machine_version_stepping)
+
+GEN_HAS_MEMBER(kernel_code_entry_byte_offset)
+GEN_HAS_MEMBER(kernel_code_prefetch_byte_size)
+
+GEN_HAS_MEMBER(granulated_workitem_vgpr_count)
+GEN_HAS_MEMBER(granulated_wavefront_sgpr_count)
+GEN_HAS_MEMBER(priority)
+GEN_HAS_MEMBER(float_mode)
+GEN_HAS_MEMBER(priv)
+GEN_HAS_MEMBER(enable_dx10_clamp)
+GEN_HAS_MEMBER(debug_mode)
+GEN_HAS_MEMBER(enable_ieee_mode)
+GEN_HAS_MEMBER(enable_wgp_mode)
+GEN_HAS_MEMBER(enable_mem_ordered)
+GEN_HAS_MEMBER(enable_fwd_progress)
+
+GEN_HAS_MEMBER(enable_sgpr_private_segment_wave_byte_offset)
+GEN_HAS_MEMBER(user_sgpr_count)
+GEN_HAS_MEMBER(enable_trap_handler)
+GEN_HAS_MEMBER(enable_sgpr_workgroup_id_x)
+GEN_HAS_MEMBER(enable_sgpr_workgroup_id_y)
+GEN_HAS_MEMBER(enable_sgpr_workgroup_id_z)
+GEN_HAS_MEMBER(enable_sgpr_workgroup_info)
+GEN_HAS_MEMBER(enable_vgpr_workitem_id)
+GEN_HAS_MEMBER(enable_exception_msb)
+GEN_HAS_MEMBER(granulated_lds_size)
+GEN_HAS_MEMBER(enable_exception)
+
+GEN_HAS_MEMBER(enable_sgpr_private_segment_buffer)
+GEN_HAS_MEMBER(enable_sgpr_dispatch_ptr)
+GEN_HAS_MEMBER(enable_sgpr_queue_ptr)
+GEN_HAS_MEMBER(enable_sgpr_kernarg_segment_ptr)
+GEN_HAS_MEMBER(enable_sgpr_dispatch_id)
+GEN_HAS_MEMBER(enable_sgpr_flat_scratch_init)
+GEN_HAS_MEMBER(enable_sgpr_private_segment_size)
+GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_x)
+GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_y)
+GEN_HAS_MEMBER(enable_sgpr_grid_workgroup_count_z)
+GEN_HAS_MEMBER(enable_wavefront_size32)
+GEN_HAS_MEMBER(enable_ordered_append_gds)
+GEN_HAS_MEMBER(private_element_size)
+GEN_HAS_MEMBER(is_ptr64)
+GEN_HAS_MEMBER(is_dynamic_callstack)
+GEN_HAS_MEMBER(is_debug_enabled)
+GEN_HAS_MEMBER(is_xnack_enabled)
+
+GEN_HAS_MEMBER(workitem_private_segment_byte_size)
+GEN_HAS_MEMBER(workgroup_group_segment_byte_size)
+GEN_HAS_MEMBER(gds_segment_byte_size)
+GEN_HAS_MEMBER(kernarg_segment_byte_size)
+GEN_HAS_MEMBER(workgroup_fbarrier_count)
+GEN_HAS_MEMBER(wavefront_sgpr_count)
+GEN_HAS_MEMBER(workitem_vgpr_count)
+GEN_HAS_MEMBER(reserved_vgpr_first)
+GEN_HAS_MEMBER(reserved_vgpr_count)
+GEN_HAS_MEMBER(reserved_sgpr_first)
+GEN_HAS_MEMBER(reserved_sgpr_count)
+GEN_HAS_MEMBER(debug_wavefront_private_segment_offset_sgpr)
+GEN_HAS_MEMBER(debug_private_segment_buffer_sgpr)
+GEN_HAS_MEMBER(kernarg_segment_alignment)
+GEN_HAS_MEMBER(group_segment_alignment)
+GEN_HAS_MEMBER(private_segment_alignment)
+GEN_HAS_MEMBER(wavefront_size)
+GEN_HAS_MEMBER(call_convention)
+GEN_HAS_MEMBER(runtime_loader_kernel_symbol)
+
+static ArrayRef<StringRef> get_amd_kernel_code_t_FldNames() {
+ static StringRef const Table[] = {
+ "", // not found placeholder
+#define RECORD(name, altName, print, parse) #name
+#include "Utils/AMDKernelCodeTInfo.h"
+#undef RECORD
+ };
+ return ArrayRef(Table);
+}
+
+static ArrayRef<StringRef> get_amd_kernel_code_t_FldAltNames() {
+ static StringRef const Table[] = {
+ "", // not found placeholder
+#define RECORD(name, altName, print, parse) #altName
+#include "Utils/AMDKernelCodeTInfo.h"
+#undef RECORD
+ };
+ return ArrayRef(Table);
+}
+
+static ArrayRef<bool> hasMCExprVersionTable() {
+ static bool const Table[] = {
+#define RECORD(name, altName, print, parse) (HasMember##name::RESULT)
+#include "Utils/AMDKernelCodeTInfo.h"
+#undef RECORD
+ };
+ return ArrayRef(Table);
+}
+
+static ArrayRef<std::reference_wrapper<const MCExpr *>>
+getMCExprIndexTable(MCKernelCodeT &C) {
+ static std::reference_wrapper<const MCExpr *> Table[] = {
+#define RECORD(name, altName, print, parse) GetMember##name::Get(C)
+#include "Utils/AMDKernelCodeTInfo.h"
+#undef RECORD
+ };
+ return ArrayRef(Table);
+}
+
+static StringMap<int> createIndexMap(const ArrayRef<StringRef> &names,
+ const ArrayRef<StringRef> &altNames) {
+ StringMap<int> map;
+ assert(names.size() == altNames.size());
+ for (unsigned i = 0; i < names.size(); ++i) {
+ map.insert(std::pair(names[i], i));
+ map.insert(std::pair(altNames[i], i));
+ }
...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
return Parser ? Parser(*this, MCParser, Err) : false; | ||
} | ||
|
||
void AMDGPUMCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, const char *tab, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
make tab a StringRef? I also thought that there was something for indenting in formatted_raw_ostream
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did find a indenting call but it seems to only allow spaces so instead I've just added the tab indents in the method itself for now.
Also removing |
@@ -20,6 +21,7 @@ add_llvm_component_library(LLVMAMDGPUDesc | |||
CodeGenTypes | |||
Core | |||
MC | |||
MCParser |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Questioning whether it's desired to have MCParser
within the AMDGPUDesc
. Initially seemed suitable to have AMDGPUMCKernelCodeT
together with AMDGPUMCKernelDescriptor
as they are both consumers of SIProgramInfo
and serve a similar metadata emit purpose but it may be desired to have AMDGPUMCKernelCodeT
elsewhere. Let me know if anybody objects to the MCParser
and/or would prefer AMDGPUMCKernelCodeT
to live elsewhere.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't see why they would need to be here. The actual definition can be separate from the parsing
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Moved the MCTargetDesc/AMDGPUMCKernelCodeT.*
files to the original Utils/AMDKernelCodeTUtils.*
files and location. Should now adhere to the same access to parsing as prior this change.
@@ -20,6 +21,7 @@ add_llvm_component_library(LLVMAMDGPUDesc | |||
CodeGenTypes | |||
Core | |||
MC | |||
MCParser |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't see why they would need to be here. The actual definition can be separate from the parsing
…rite all members (and change table driven strategy to conform that)
@@ -1229,7 +1229,13 @@ void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, | |||
KernelCode.amd_machine_version_minor = Version.Minor; | |||
KernelCode.amd_machine_version_stepping = Version.Stepping; | |||
KernelCode.kernel_code_entry_byte_offset = sizeof(amd_kernel_code_t); | |||
KernelCode.wavefront_size = 6; | |||
if (Version.Major >= 10 && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This probably doesn't need the version check, if you do it's hiding another bug somewhere. I guess this can be a separate change since it's already doing it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I should add that this is explicitly checked for in the parser:
llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Lines 5905 to 5908 in 82c5d35
if (ID == "wavefront_size") { | |
if (Header.wavefront_size == 5) { | |
if (!isGFX10Plus()) | |
return TokError("wavefront_size=5 is only allowed on GFX10+"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, wave32 is invalid pre-gfx10. It shouldn't have been valid to set wave32 on the wave64 only target in the first place
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, I misunderstood the initial comment. I've removed the version check (although, do let me know if you'd like it to be a separate change after all).
Wraps an AMDGPUMCKernelCodeT struct over the existing amd_kernel_code_t struct with MCExprs for members that would be derived from SIProgramInfo MCExpr members.