-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Remove wavefrontsize feature from GFX10+ #98400
Conversation
Processor definition shall not include a default feature which may be switched off by a different wave size. This allows not to write -mattr=-wavefrontsize32,+wavefrontsize64 in tests.
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-mc Author: Stanislav Mekhanoshin (rampitec) ChangesProcessor definition shall not include a default feature which may be switched off by a different wave size. This allows not to write -mattr=-wavefrontsize32,+wavefrontsize64 in tests. Patch is 71.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/98400.diff 10 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 94e8e77b3c052..dfc8eaea66f7b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1464,7 +1464,6 @@ def FeatureISAVersion10_Common : FeatureSet<
FeatureLDSBankCount32,
FeatureDLInsts,
FeatureNSAEncoding,
- FeatureWavefrontSize32,
FeatureBackOffBarrier]>;
def FeatureISAVersion10_1_Common : FeatureSet<
@@ -1548,7 +1547,6 @@ def FeatureISAVersion11_Common : FeatureSet<
FeatureDot10Insts,
FeatureNSAEncoding,
FeaturePartialNSAEncoding,
- FeatureWavefrontSize32,
FeatureShaderCyclesRegister,
FeatureArchitectedFlatScratch,
FeatureAtomicFaddRtnInsts,
@@ -1625,7 +1623,6 @@ def FeatureISAVersion12 : FeatureSet<
FeatureDot11Insts,
FeatureNSAEncoding,
FeaturePartialNSAEncoding,
- FeatureWavefrontSize32,
FeatureShaderCyclesHiLoRegisters,
FeatureArchitectedFlatScratch,
FeatureArchitectedSGPRs,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 21fe1bc31a27e..a59893d3cf85d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -105,6 +105,14 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
: AMDGPUSubtarget::SOUTHERN_ISLANDS;
}
+ if (!hasFeature(AMDGPU::FeatureWavefrontSize32) &&
+ !hasFeature(AMDGPU::FeatureWavefrontSize64)) {
+ if (getGeneration() >= AMDGPUSubtarget::GFX10)
+ ToggleFeature(AMDGPU::FeatureWavefrontSize32);
+ else
+ ToggleFeature(AMDGPU::FeatureWavefrontSize64);
+ }
+
// We don't support FP64 for EG/NI atm.
assert(!hasFP64() || (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS));
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index b08957d22ee74..1c3925cfad464 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1408,9 +1408,18 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
copySTI().ToggleFeature("southern-islands");
}
+ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
+ FeatureBitset FB = getFeatureBits();
+ if (!FB[AMDGPU::FeatureWavefrontSize64] &&
+ !FB[AMDGPU::FeatureWavefrontSize32]) {
+ if (ISA.Major >= 10)
+ copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
+ else
+ copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize64);
+ }
+
setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
- AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 695b2f246a778..57d717dd9e634 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -45,10 +45,26 @@ using namespace llvm;
using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
+static const MCSubtargetInfo &addDefaultWaveSize(const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ if (!STI.hasFeature(AMDGPU::FeatureWavefrontSize64) &&
+ !STI.hasFeature(AMDGPU::FeatureWavefrontSize32)) {
+ MCSubtargetInfo &STICopy = Ctx.getSubtargetCopy(STI);
+ if (AMDGPU::isGFX10Plus(STI))
+ STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize32);
+ else
+ STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize64);
+ return STICopy;
+ }
+
+ return STI;
+}
+
AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
MCContext &Ctx, MCInstrInfo const *MCII)
- : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
- MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
+ : MCDisassembler(addDefaultWaveSize(STI, Ctx), Ctx), MCII(MCII),
+ MRI(*Ctx.getRegisterInfo()), MAI(*Ctx.getAsmInfo()),
+ TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
// ToDo: AMDGPUDisassembler supports only VI ISA.
if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
diff --git a/llvm/test/CodeGen/AMDGPU/check-subtarget-features.ll b/llvm/test/CodeGen/AMDGPU/check-subtarget-features.ll
index c246939811046..95ae8a6adfdf8 100644
--- a/llvm/test/CodeGen/AMDGPU/check-subtarget-features.ll
+++ b/llvm/test/CodeGen/AMDGPU/check-subtarget-features.ll
@@ -1,5 +1,3 @@
-; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,-wavefrontsize64 < %s 2>&1 | FileCheck %s -check-prefix=ERR -implicit-check-not=error:
-; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,-wavefrontsize64 < %s 2>&1 | FileCheck %s -check-prefix=ERR -implicit-check-not=error:
; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+wavefrontsize64 < %s 2>&1 | FileCheck %s -check-prefix=ERR -implicit-check-not=error:
; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+wavefrontsize64 < %s 2>&1 | FileCheck %s -check-prefix=ERR -implicit-check-not=error:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
index 270ab5fee1125..824d3708c027d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
@@ -1,8 +1,8 @@
; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W32 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,W32 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W32 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,W32 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
; RUN: opt -O3 -S < %s | FileCheck -check-prefix=OPT %s
; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefix=OPT %s
@@ -10,10 +10,10 @@
; RUN: opt -mtriple=amdgcn-- -passes='default<O3>' -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s
; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -O3 -S < %s | FileCheck -check-prefix=OPT %s
-; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
-; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
-; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
-; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
; GCN-LABEL: {{^}}fold_wavefrontsize:
; OPT-LABEL: define amdgpu_kernel void @fold_wavefrontsize(
diff --git a/llvm/test/CodeGen/AMDGPU/unknown-processor.ll b/llvm/test/CodeGen/AMDGPU/unknown-processor.ll
index 683ba98e52cf1..9cfba8b2e5c04 100644
--- a/llvm/test/CodeGen/AMDGPU/unknown-processor.ll
+++ b/llvm/test/CodeGen/AMDGPU/unknown-processor.ll
@@ -1,4 +1,4 @@
-; RUN: not llc -mtriple=amdgcn-- -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=GCN %s
; RUN: llc -mtriple=r600-- -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=R600 %s
target datalayout = "A5"
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s
index b16caed8b275f..75f20b0c7f0c4 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s
@@ -12,13 +12,13 @@ v_cmp_class_f16 vcc, vcc_hi, v255
v_cmp_class_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_class_f16_e64
-v_cmp_class_f16 vcc_lo, v127, v255
+v_cmp_class_f16 vcc, v127, v255
// GFX11: v_cmp_class_f16_e64
-v_cmp_class_f16 vcc_lo, vcc_hi, v255
+v_cmp_class_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_class_f16_e64
-v_cmp_class_f16 vcc_lo, vcc_lo, v255
+v_cmp_class_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_class_f16_e64
v_cmp_eq_f16 vcc, v1, v255
@@ -33,16 +33,16 @@ v_cmp_eq_f16 vcc, vcc_hi, v255
v_cmp_eq_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_eq_f16_e64
-v_cmp_eq_f16 vcc_lo, v1, v255
+v_cmp_eq_f16 vcc, v1, v255
// GFX11: v_cmp_eq_f16_e64
-v_cmp_eq_f16 vcc_lo, v127, v255
+v_cmp_eq_f16 vcc, v127, v255
// GFX11: v_cmp_eq_f16_e64
-v_cmp_eq_f16 vcc_lo, vcc_hi, v255
+v_cmp_eq_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_eq_f16_e64
-v_cmp_eq_f16 vcc_lo, vcc_lo, v255
+v_cmp_eq_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_eq_f16_e64
v_cmp_eq_i16 vcc, v1, v255
@@ -57,16 +57,16 @@ v_cmp_eq_i16 vcc, vcc_hi, v255
v_cmp_eq_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_eq_i16_e64
-v_cmp_eq_i16 vcc_lo, v1, v255
+v_cmp_eq_i16 vcc, v1, v255
// GFX11: v_cmp_eq_i16_e64
-v_cmp_eq_i16 vcc_lo, v127, v255
+v_cmp_eq_i16 vcc, v127, v255
// GFX11: v_cmp_eq_i16_e64
-v_cmp_eq_i16 vcc_lo, vcc_hi, v255
+v_cmp_eq_i16 vcc, vcc_hi, v255
// GFX11: v_cmp_eq_i16_e64
-v_cmp_eq_i16 vcc_lo, vcc_lo, v255
+v_cmp_eq_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_eq_i16_e64
v_cmp_eq_u16 vcc, v1, v255
@@ -81,16 +81,16 @@ v_cmp_eq_u16 vcc, vcc_hi, v255
v_cmp_eq_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_eq_u16_e64
-v_cmp_eq_u16 vcc_lo, v1, v255
+v_cmp_eq_u16 vcc, v1, v255
// GFX11: v_cmp_eq_u16_e64
-v_cmp_eq_u16 vcc_lo, v127, v255
+v_cmp_eq_u16 vcc, v127, v255
// GFX11: v_cmp_eq_u16_e64
-v_cmp_eq_u16 vcc_lo, vcc_hi, v255
+v_cmp_eq_u16 vcc, vcc_hi, v255
// GFX11: v_cmp_eq_u16_e64
-v_cmp_eq_u16 vcc_lo, vcc_lo, v255
+v_cmp_eq_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_eq_u16_e64
v_cmp_f_f16 vcc, v1, v255
@@ -105,16 +105,16 @@ v_cmp_f_f16 vcc, vcc_hi, v255
v_cmp_f_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_f_f16_e64
-v_cmp_f_f16 vcc_lo, v1, v255
+v_cmp_f_f16 vcc, v1, v255
// GFX11: v_cmp_f_f16_e64
-v_cmp_f_f16 vcc_lo, v127, v255
+v_cmp_f_f16 vcc, v127, v255
// GFX11: v_cmp_f_f16_e64
-v_cmp_f_f16 vcc_lo, vcc_hi, v255
+v_cmp_f_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_f_f16_e64
-v_cmp_f_f16 vcc_lo, vcc_lo, v255
+v_cmp_f_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_f_f16_e64
v_cmp_ge_f16 vcc, v1, v255
@@ -129,16 +129,16 @@ v_cmp_ge_f16 vcc, vcc_hi, v255
v_cmp_ge_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_ge_f16_e64
-v_cmp_ge_f16 vcc_lo, v1, v255
+v_cmp_ge_f16 vcc, v1, v255
// GFX11: v_cmp_ge_f16_e64
-v_cmp_ge_f16 vcc_lo, v127, v255
+v_cmp_ge_f16 vcc, v127, v255
// GFX11: v_cmp_ge_f16_e64
-v_cmp_ge_f16 vcc_lo, vcc_hi, v255
+v_cmp_ge_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_ge_f16_e64
-v_cmp_ge_f16 vcc_lo, vcc_lo, v255
+v_cmp_ge_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_ge_f16_e64
v_cmp_ge_i16 vcc, v1, v255
@@ -153,16 +153,16 @@ v_cmp_ge_i16 vcc, vcc_hi, v255
v_cmp_ge_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_ge_i16_e64
-v_cmp_ge_i16 vcc_lo, v1, v255
+v_cmp_ge_i16 vcc, v1, v255
// GFX11: v_cmp_ge_i16_e64
-v_cmp_ge_i16 vcc_lo, v127, v255
+v_cmp_ge_i16 vcc, v127, v255
// GFX11: v_cmp_ge_i16_e64
-v_cmp_ge_i16 vcc_lo, vcc_hi, v255
+v_cmp_ge_i16 vcc, vcc_hi, v255
// GFX11: v_cmp_ge_i16_e64
-v_cmp_ge_i16 vcc_lo, vcc_lo, v255
+v_cmp_ge_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_ge_i16_e64
v_cmp_ge_u16 vcc, v1, v255
@@ -177,16 +177,16 @@ v_cmp_ge_u16 vcc, vcc_hi, v255
v_cmp_ge_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_ge_u16_e64
-v_cmp_ge_u16 vcc_lo, v1, v255
+v_cmp_ge_u16 vcc, v1, v255
// GFX11: v_cmp_ge_u16_e64
-v_cmp_ge_u16 vcc_lo, v127, v255
+v_cmp_ge_u16 vcc, v127, v255
// GFX11: v_cmp_ge_u16_e64
-v_cmp_ge_u16 vcc_lo, vcc_hi, v255
+v_cmp_ge_u16 vcc, vcc_hi, v255
// GFX11: v_cmp_ge_u16_e64
-v_cmp_ge_u16 vcc_lo, vcc_lo, v255
+v_cmp_ge_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_ge_u16_e64
v_cmp_gt_f16 vcc, v1, v255
@@ -201,16 +201,16 @@ v_cmp_gt_f16 vcc, vcc_hi, v255
v_cmp_gt_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_gt_f16_e64
-v_cmp_gt_f16 vcc_lo, v1, v255
+v_cmp_gt_f16 vcc, v1, v255
// GFX11: v_cmp_gt_f16_e64
-v_cmp_gt_f16 vcc_lo, v127, v255
+v_cmp_gt_f16 vcc, v127, v255
// GFX11: v_cmp_gt_f16_e64
-v_cmp_gt_f16 vcc_lo, vcc_hi, v255
+v_cmp_gt_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_gt_f16_e64
-v_cmp_gt_f16 vcc_lo, vcc_lo, v255
+v_cmp_gt_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_gt_f16_e64
v_cmp_gt_i16 vcc, v1, v255
@@ -225,16 +225,16 @@ v_cmp_gt_i16 vcc, vcc_hi, v255
v_cmp_gt_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_gt_i16_e64
-v_cmp_gt_i16 vcc_lo, v1, v255
+v_cmp_gt_i16 vcc, v1, v255
// GFX11: v_cmp_gt_i16_e64
-v_cmp_gt_i16 vcc_lo, v127, v255
+v_cmp_gt_i16 vcc, v127, v255
// GFX11: v_cmp_gt_i16_e64
-v_cmp_gt_i16 vcc_lo, vcc_hi, v255
+v_cmp_gt_i16 vcc, vcc_hi, v255
// GFX11: v_cmp_gt_i16_e64
-v_cmp_gt_i16 vcc_lo, vcc_lo, v255
+v_cmp_gt_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_gt_i16_e64
v_cmp_gt_u16 vcc, v1, v255
@@ -249,16 +249,16 @@ v_cmp_gt_u16 vcc, vcc_hi, v255
v_cmp_gt_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_gt_u16_e64
-v_cmp_gt_u16 vcc_lo, v1, v255
+v_cmp_gt_u16 vcc, v1, v255
// GFX11: v_cmp_gt_u16_e64
-v_cmp_gt_u16 vcc_lo, v127, v255
+v_cmp_gt_u16 vcc, v127, v255
// GFX11: v_cmp_gt_u16_e64
-v_cmp_gt_u16 vcc_lo, vcc_hi, v255
+v_cmp_gt_u16 vcc, vcc_hi, v255
// GFX11: v_cmp_gt_u16_e64
-v_cmp_gt_u16 vcc_lo, vcc_lo, v255
+v_cmp_gt_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_gt_u16_e64
v_cmp_le_f16 vcc, v1, v255
@@ -273,16 +273,16 @@ v_cmp_le_f16 vcc, vcc_hi, v255
v_cmp_le_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_le_f16_e64
-v_cmp_le_f16 vcc_lo, v1, v255
+v_cmp_le_f16 vcc, v1, v255
// GFX11: v_cmp_le_f16_e64
-v_cmp_le_f16 vcc_lo, v127, v255
+v_cmp_le_f16 vcc, v127, v255
// GFX11: v_cmp_le_f16_e64
-v_cmp_le_f16 vcc_lo, vcc_hi, v255
+v_cmp_le_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_le_f16_e64
-v_cmp_le_f16 vcc_lo, vcc_lo, v255
+v_cmp_le_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_le_f16_e64
v_cmp_le_i16 vcc, v1, v255
@@ -297,16 +297,16 @@ v_cmp_le_i16 vcc, vcc_hi, v255
v_cmp_le_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_le_i16_e64
-v_cmp_le_i16 vcc_lo, v1, v255
+v_cmp_le_i16 vcc, v1, v255
// GFX11: v_cmp_le_i16_e64
-v_cmp_le_i16 vcc_lo, v127, v255
+v_cmp_le_i16 vcc, v127, v255
// GFX11: v_cmp_le_i16_e64
-v_cmp_le_i16 vcc_lo, vcc_hi, v255
+v_cmp_le_i16 vcc, vcc_hi, v255
// GFX11: v_cmp_le_i16_e64
-v_cmp_le_i16 vcc_lo, vcc_lo, v255
+v_cmp_le_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_le_i16_e64
v_cmp_le_u16 vcc, v1, v255
@@ -321,16 +321,16 @@ v_cmp_le_u16 vcc, vcc_hi, v255
v_cmp_le_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_le_u16_e64
-v_cmp_le_u16 vcc_lo, v1, v255
+v_cmp_le_u16 vcc, v1, v255
// GFX11: v_cmp_le_u16_e64
-v_cmp_le_u16 vcc_lo, v127, v255
+v_cmp_le_u16 vcc, v127, v255
// GFX11: v_cmp_le_u16_e64
-v_cmp_le_u16 vcc_lo, vcc_hi, v255
+v_cmp_le_u16 vcc, vcc_hi, v255
// GFX11: v_cmp_le_u16_e64
-v_cmp_le_u16 vcc_lo, vcc_lo, v255
+v_cmp_le_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_le_u16_e64
v_cmp_lg_f16 vcc, v1, v255
@@ -345,16 +345,16 @@ v_cmp_lg_f16 vcc, vcc_hi, v255
v_cmp_lg_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_lg_f16_e64
-v_cmp_lg_f16 vcc_lo, v1, v255
+v_cmp_lg_f16 vcc, v1, v255
// GFX11: v_cmp_lg_f16_e64
-v_cmp_lg_f16 vcc_lo, v127, v255
+v_cmp_lg_f16 vcc, v127, v255
// GFX11: v_cmp_lg_f16_e64
-v_cmp_lg_f16 vcc_lo, vcc_hi, v255
+v_cmp_lg_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_lg_f16_e64
-v_cmp_lg_f16 vcc_lo, vcc_lo, v255
+v_cmp_lg_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_lg_f16_e64
v_cmp_lt_f16 vcc, v1, v255
@@ -369,16 +369,16 @@ v_cmp_lt_f16 vcc, vcc_hi, v255
v_cmp_lt_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_lt_f16_e64
-v_cmp_lt_f16 vcc_lo, v1, v255
+v_cmp_lt_f16 vcc, v1, v255
// GFX11: v_cmp_lt_f16_e64
-v_cmp_lt_f16 vcc_lo, v127, v255
+v_cmp_lt_f16 vcc, v127, v255
// GFX11: v_cmp_lt_f16_e64
-v_cmp_lt_f16 vcc_lo, vcc_hi, v255
+v_cmp_lt_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_lt_f16_e64
-v_cmp_lt_f16 vcc_lo, vcc_lo, v255
+v_cmp_lt_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_lt_f16_e64
v_cmp_lt_i16 vcc, v1, v255
@@ -393,16 +393,16 @@ v_cmp_lt_i16 vcc, vcc_hi, v255
v_cmp_lt_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_lt_i16_e64
-v_cmp_lt_i16 vcc_lo, v1, v255
+v_cmp_lt_i16 vcc, v1, v255
// GFX11: v_cmp_lt_i16_e64
-v_cmp_lt_i16 vcc_lo, v127, v255
+v_cmp_lt_i16 vcc, v127, v255
// GFX11: v_cmp_lt_i16_e64
-v_cmp_lt_i16 vcc_lo, vcc_hi, v255
+v_cmp_lt_i16 vcc, vcc_hi, v255
// GFX11: v_cmp_lt_i16_e64
-v_cmp_lt_i16 vcc_lo, vcc_lo, v255
+v_cmp_lt_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_lt_i16_e64
v_cmp_lt_u16 vcc, v1, v255
@@ -417,16 +417,16 @@ v_cmp_lt_u16 vcc, vcc_hi, v255
v_cmp_lt_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_lt_u16_e64
-v_cmp_lt_u16 vcc_lo, v1, v255
+v_cmp_lt_u16 vcc, v1, v255
// GFX11: v_cmp_lt_u16_e64
-v_cmp_lt_u16 vcc_lo, v127, v255
+v_cmp_lt_u16 vcc, v127, v255
// GFX11: v_cmp_lt_u16_e64
-v_cmp_lt_u16 vcc_lo, vcc_hi, v255
+v_cmp_lt_u16 vcc, vcc_hi, v255
// GFX11: v_cmp_lt_u16_e64
-v_cmp_lt_u16 vcc_lo, vcc_lo, v255
+v_cmp_lt_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_lt_u16_e64
v_cmp_ne_i16 vcc, v1, v255
@@ -441,16 +441,16 @@ v_cmp_ne_i16 vcc, vcc_hi, v255
v_cmp_ne_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_ne_i16_e64
-v_cmp_ne_i16 vcc_lo, v1, v255
+v_cmp_ne_i16 vcc, v1, v255
// GFX11: v_cmp_ne_i16_e64
-v_cmp_ne_i16 vcc_lo, v127, v255
+v_cmp_ne_i16 vcc, v127, v255
// GFX11: v_cmp_ne_i16_e64
-v_cmp_ne_i16 vcc_lo, vcc_hi, v255
+v_cmp_ne_i16 vcc, vcc_hi, v255
// GFX11: v_cmp_ne_i16_e64
-v_cmp_ne_i16 vcc_lo, vcc_lo, v255
+v_cmp_ne_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_ne_i16_e64
v_cmp_ne_u16 vcc, v1, v255
@@ -465,16 +465,16 @@ v_cmp_ne_u16 vcc, vcc_hi, v255
v_cmp_ne_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_ne_u16_e64
-v_cmp_ne_u16 vcc_lo, v1, v255
+v_cmp_ne_u16 vcc, v1, v255
// GFX11: v_cmp_ne_u16_e64
-v_cmp_ne_u16 vcc_lo, v127, v255
+v_cmp_ne_u16 vcc, v127, v255
// GFX11: v_cmp_ne_u16_e64
-v_cmp_ne_u16 vcc_lo, vcc_hi, v255
+v_cmp_ne_u16 vcc, vcc_hi, v255
// GFX11: v_cmp_ne_u16_e64
-v_cmp_ne_u16 vcc_lo, vcc_lo, v255
+v_cmp_ne_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_ne_u16_e64
v_cmp_neq_f16 vcc, v1, v255
@@ -489,16 +489,16 @@ v_cmp_neq_f16 vcc, vcc_hi, v255
v_cmp_neq_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_neq_f16_e64
-v_cmp_neq_f16 vcc_lo, v1, v255
+v_cmp_neq_f16 vcc, v1, v255
// GFX11: v_cmp_neq_f16_e64
-v_cmp_neq_f16 vcc_lo, v127, v255
+v_cmp_neq_f16 vcc, v127, v255
// GFX11: v_cmp_neq_f16_e64
-v_cmp_neq_f16 vcc_lo, vcc_hi, v255
+v_cmp_neq_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_neq_f16_e64
-v_cmp_neq_f16 vcc_lo, vcc_lo, v255
+v_cmp_neq_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_neq_f16_e64
v_cmp_nge_f16 vcc, v1, v255
@@ -513,16 +5...
[truncated]
|
@@ -1,5 +1,3 @@ | |||
; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,-wavefrontsize64 < %s 2>&1 | FileCheck %s -check-prefix=ERR -implicit-check-not=error: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This diagnostics is now missing, BE will just initialize wavesize to default. If we want to catch this case we would still need to scan the original string.
I did not remove all these |
Also note that clang and flang do the same thing by calling |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do those clang tests need to be updated as well?
No, FE is unaffected. But let's see what github testers may find. |
if (!hasFeature(AMDGPU::FeatureWavefrontSize32) && | ||
!hasFeature(AMDGPU::FeatureWavefrontSize64)) { | ||
if (getGeneration() >= AMDGPUSubtarget::GFX10) | ||
ToggleFeature(AMDGPU::FeatureWavefrontSize32); | ||
else | ||
ToggleFeature(AMDGPU::FeatureWavefrontSize64); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can avoid this by having a separate SupportsWave32 feature implied by FeatureWavefrontSize32
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We still need to select default somewhere. So say a subtarget has FeatureSupportsWave32, then what?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I.e. the line if (getGeneration() >= AMDGPUSubtarget::GFX10)
can be changed to if (hasFeature(AMDGPU::FeatureSupportsWave32))
, but that's it.
Also I shall mention, this if
can be avoided altogether because subtargets without an option to select wave size just have a nailed wave64 feature in their definition. This if
here is just to catch cases when tests use manual -mattr switching it off, but I can remove it and only set wave32 as a default.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just did that, unconditionally add wave32 if none is set.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mean if you use the implies feature, the feature parsing logic should flip the incompatible case for you instead of manually doing it here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I do not see a way to tell it is incompatible in the first place. OK, wave32 implies it supports wave32. How does it turn wave32 automatically? How does it tell that wave64 is incompatible with wave32?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IMHO to do what you want features must be:
- Organized in groups.
- We need to tell features in group are mutually exclusive.
- There shall be a way to tell this one is the default.
None of that exists.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mean if there's a SupportsWave32 feature, implied by FeatureWavefrontSize32, if you specify wavefrontsize32 to a wave64-only target, the incompatible feature check in the parsing logic will hit and it will assume you specified an invalid target and unset the target-cpu.
Older targets just have FeatureWavefrontSize64 in their definitions.
MCContext &Ctx) { | ||
if (!STI.hasFeature(AMDGPU::FeatureWavefrontSize64) && | ||
!STI.hasFeature(AMDGPU::FeatureWavefrontSize32)) { | ||
MCSubtargetInfo &STICopy = Ctx.getSubtargetCopy(STI); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't really understand this subtargetcopy business
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
STI is const, I cannot just flip the bit. The same is done in the AsmParser with copySTI().
if (!hasFeature(AMDGPU::FeatureWavefrontSize32) && | ||
!hasFeature(AMDGPU::FeatureWavefrontSize64)) { | ||
if (getGeneration() >= AMDGPUSubtarget::GFX10) | ||
ToggleFeature(AMDGPU::FeatureWavefrontSize32); | ||
else | ||
ToggleFeature(AMDGPU::FeatureWavefrontSize64); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mean if you use the implies feature, the feature parsing logic should flip the incompatible case for you instead of manually doing it here
Ping |
Summary: Processor definition shall not include a default feature which may be switched off by a different wave size. This allows not to write -mattr=-wavefrontsize32,+wavefrontsize64 in tests. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D59822435
Those are not needed now that <llvm#98400> is submitted.
Summary: Processor definition shall not include a default feature which may be switched off by a different wave size. This allows not to write -mattr=-wavefrontsize32,+wavefrontsize64 in tests. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251504
…100339) Summary: Those are not needed now that <#98400> is submitted. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250755
They are no longer needed after the patch: [AMDGPU] Remove wavefrontsize feature from GFX10: llvm#98400 Note that they may still be needed if "target-features" are set to "+wavefrontsize32" or "+wavefrontsize64".
…ts (NFC) (#100711) They are no longer needed after the patch: [AMDGPU] Remove wavefrontsize feature from GFX10: #98400 The exception is when "target-features" are set to "+wavefrontsize32" or "+wavefrontsize64", we still need to remove a wavefrontsize feature before add a different one to make sure only one of them are present.
…lvm#100339) Those are not needed now that <llvm#98400> is submitted.
This reverts commit b132dd4.
Processor definition shall not include a default feature which may be switched off by a different wave size. This allows not to write -mattr=-wavefrontsize32,+wavefrontsize64 in tests.