[AMDGPU] Remove wavefrontsize feature from GFX10+ #98400

rampitec · 2024-07-10T21:58:05Z

Processor definition shall not include a default feature which may be switched off by a different wave size. This allows not to write -mattr=-wavefrontsize32,+wavefrontsize64 in tests.

llvmbot · 2024-07-10T21:58:34Z

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-mc

Author: Stanislav Mekhanoshin (rampitec)

Changes

Processor definition shall not include a default feature which may be switched off by a different wave size. This allows not to write -mattr=-wavefrontsize32,+wavefrontsize64 in tests.

Patch is 71.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/98400.diff

10 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPU.td (-3)
(modified) llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (+8)
(modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+10-1)
(modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+18-2)
(modified) llvm/test/CodeGen/AMDGPU/check-subtarget-features.ll (-2)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll (+8-8)
(modified) llvm/test/CodeGen/AMDGPU/unknown-processor.ll (+1-1)
(modified) llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s (+327-327)
(modified) llvm/test/MC/AMDGPU/wave32.s (+4-4)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx10-wave32.txt (+2-2)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 94e8e77b3c052..dfc8eaea66f7b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1464,7 +1464,6 @@ def FeatureISAVersion10_Common : FeatureSet<
    FeatureLDSBankCount32,
    FeatureDLInsts,
    FeatureNSAEncoding,
-   FeatureWavefrontSize32,
    FeatureBackOffBarrier]>;
 
 def FeatureISAVersion10_1_Common : FeatureSet<
@@ -1548,7 +1547,6 @@ def FeatureISAVersion11_Common : FeatureSet<
    FeatureDot10Insts,
    FeatureNSAEncoding,
    FeaturePartialNSAEncoding,
-   FeatureWavefrontSize32,
    FeatureShaderCyclesRegister,
    FeatureArchitectedFlatScratch,
    FeatureAtomicFaddRtnInsts,
@@ -1625,7 +1623,6 @@ def FeatureISAVersion12 : FeatureSet<
    FeatureDot11Insts,
    FeatureNSAEncoding,
    FeaturePartialNSAEncoding,
-   FeatureWavefrontSize32,
    FeatureShaderCyclesHiLoRegisters,
    FeatureArchitectedFlatScratch,
    FeatureArchitectedSGPRs,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 21fe1bc31a27e..a59893d3cf85d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -105,6 +105,14 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
                                         : AMDGPUSubtarget::SOUTHERN_ISLANDS;
   }
 
+  if (!hasFeature(AMDGPU::FeatureWavefrontSize32) &&
+      !hasFeature(AMDGPU::FeatureWavefrontSize64)) {
+    if (getGeneration() >= AMDGPUSubtarget::GFX10)
+      ToggleFeature(AMDGPU::FeatureWavefrontSize32);
+    else
+      ToggleFeature(AMDGPU::FeatureWavefrontSize64);
+  }
+
   // We don't support FP64 for EG/NI atm.
   assert(!hasFP64() || (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS));
 
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index b08957d22ee74..1c3925cfad464 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1408,9 +1408,18 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
       copySTI().ToggleFeature("southern-islands");
     }
 
+    AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
+    FeatureBitset FB = getFeatureBits();
+    if (!FB[AMDGPU::FeatureWavefrontSize64] &&
+        !FB[AMDGPU::FeatureWavefrontSize32]) {
+      if (ISA.Major >= 10)
+        copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
+      else
+        copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize64);
+    }
+
     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
 
-    AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
     if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
       createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
       createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 695b2f246a778..57d717dd9e634 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -45,10 +45,26 @@ using namespace llvm;
 
 using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
 
+static const MCSubtargetInfo &addDefaultWaveSize(const MCSubtargetInfo &STI,
+                                                 MCContext &Ctx) {
+  if (!STI.hasFeature(AMDGPU::FeatureWavefrontSize64) &&
+      !STI.hasFeature(AMDGPU::FeatureWavefrontSize32)) {
+    MCSubtargetInfo &STICopy = Ctx.getSubtargetCopy(STI);
+    if (AMDGPU::isGFX10Plus(STI))
+      STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize32);
+    else
+      STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize64);
+    return STICopy;
+  }
+
+  return STI;
+}
+
 AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
                                        MCContext &Ctx, MCInstrInfo const *MCII)
-    : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
-      MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
+    : MCDisassembler(addDefaultWaveSize(STI, Ctx), Ctx), MCII(MCII),
+      MRI(*Ctx.getRegisterInfo()), MAI(*Ctx.getAsmInfo()),
+      TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
       CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
   // ToDo: AMDGPUDisassembler supports only VI ISA.
   if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
diff --git a/llvm/test/CodeGen/AMDGPU/check-subtarget-features.ll b/llvm/test/CodeGen/AMDGPU/check-subtarget-features.ll
index c246939811046..95ae8a6adfdf8 100644
--- a/llvm/test/CodeGen/AMDGPU/check-subtarget-features.ll
+++ b/llvm/test/CodeGen/AMDGPU/check-subtarget-features.ll
@@ -1,5 +1,3 @@
-; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,-wavefrontsize64 < %s 2>&1 | FileCheck %s -check-prefix=ERR -implicit-check-not=error:
-; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,-wavefrontsize64 < %s 2>&1 | FileCheck %s -check-prefix=ERR -implicit-check-not=error:
 ; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+wavefrontsize64 < %s 2>&1 | FileCheck %s -check-prefix=ERR -implicit-check-not=error:
 ; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+wavefrontsize64 < %s 2>&1 | FileCheck %s -check-prefix=ERR -implicit-check-not=error:
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
index 270ab5fee1125..824d3708c027d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W32 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,W32 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W32 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,W32 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
 
 ; RUN: opt -O3 -S < %s | FileCheck -check-prefix=OPT %s
 ; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefix=OPT %s
@@ -10,10 +10,10 @@
 ; RUN: opt -mtriple=amdgcn-- -passes='default<O3>' -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s
 ; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
 ; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -O3 -S < %s | FileCheck -check-prefix=OPT %s
-; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
-; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
-; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
-; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
 
 ; GCN-LABEL: {{^}}fold_wavefrontsize:
 ; OPT-LABEL: define amdgpu_kernel void @fold_wavefrontsize(
diff --git a/llvm/test/CodeGen/AMDGPU/unknown-processor.ll b/llvm/test/CodeGen/AMDGPU/unknown-processor.ll
index 683ba98e52cf1..9cfba8b2e5c04 100644
--- a/llvm/test/CodeGen/AMDGPU/unknown-processor.ll
+++ b/llvm/test/CodeGen/AMDGPU/unknown-processor.ll
@@ -1,4 +1,4 @@
-; RUN: not llc -mtriple=amdgcn-- -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=GCN %s
 ; RUN: llc -mtriple=r600-- -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=R600 %s
 target datalayout = "A5"
 
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s
index b16caed8b275f..75f20b0c7f0c4 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s
@@ -12,13 +12,13 @@ v_cmp_class_f16 vcc, vcc_hi, v255
 v_cmp_class_f16 vcc, vcc_lo, v255
 // GFX11: v_cmp_class_f16_e64
 
-v_cmp_class_f16 vcc_lo, v127, v255
+v_cmp_class_f16 vcc, v127, v255
 // GFX11: v_cmp_class_f16_e64
 
-v_cmp_class_f16 vcc_lo, vcc_hi, v255
+v_cmp_class_f16 vcc, vcc_hi, v255
 // GFX11: v_cmp_class_f16_e64
 
-v_cmp_class_f16 vcc_lo, vcc_lo, v255
+v_cmp_class_f16 vcc, vcc_lo, v255
 // GFX11: v_cmp_class_f16_e64
 
 v_cmp_eq_f16 vcc, v1, v255
@@ -33,16 +33,16 @@ v_cmp_eq_f16 vcc, vcc_hi, v255
 v_cmp_eq_f16 vcc, vcc_lo, v255
 // GFX11: v_cmp_eq_f16_e64
 
-v_cmp_eq_f16 vcc_lo, v1, v255
+v_cmp_eq_f16 vcc, v1, v255
 // GFX11: v_cmp_eq_f16_e64
 
-v_cmp_eq_f16 vcc_lo, v127, v255
+v_cmp_eq_f16 vcc, v127, v255
 // GFX11: v_cmp_eq_f16_e64
 
-v_cmp_eq_f16 vcc_lo, vcc_hi, v255
+v_cmp_eq_f16 vcc, vcc_hi, v255
 // GFX11: v_cmp_eq_f16_e64
 
-v_cmp_eq_f16 vcc_lo, vcc_lo, v255
+v_cmp_eq_f16 vcc, vcc_lo, v255
 // GFX11: v_cmp_eq_f16_e64
 
 v_cmp_eq_i16 vcc, v1, v255
@@ -57,16 +57,16 @@ v_cmp_eq_i16 vcc, vcc_hi, v255
 v_cmp_eq_i16 vcc, vcc_lo, v255
 // GFX11: v_cmp_eq_i16_e64
 
-v_cmp_eq_i16 vcc_lo, v1, v255
+v_cmp_eq_i16 vcc, v1, v255
 // GFX11: v_cmp_eq_i16_e64
 
-v_cmp_eq_i16 vcc_lo, v127, v255
+v_cmp_eq_i16 vcc, v127, v255
 // GFX11: v_cmp_eq_i16_e64
 
-v_cmp_eq_i16 vcc_lo, vcc_hi, v255
+v_cmp_eq_i16 vcc, vcc_hi, v255
 // GFX11: v_cmp_eq_i16_e64
 
-v_cmp_eq_i16 vcc_lo, vcc_lo, v255
+v_cmp_eq_i16 vcc, vcc_lo, v255
 // GFX11: v_cmp_eq_i16_e64
 
 v_cmp_eq_u16 vcc, v1, v255
@@ -81,16 +81,16 @@ v_cmp_eq_u16 vcc, vcc_hi, v255
 v_cmp_eq_u16 vcc, vcc_lo, v255
 // GFX11: v_cmp_eq_u16_e64
 
-v_cmp_eq_u16 vcc_lo, v1, v255
+v_cmp_eq_u16 vcc, v1, v255
 // GFX11: v_cmp_eq_u16_e64
 
-v_cmp_eq_u16 vcc_lo, v127, v255
+v_cmp_eq_u16 vcc, v127, v255
 // GFX11: v_cmp_eq_u16_e64
 
-v_cmp_eq_u16 vcc_lo, vcc_hi, v255
+v_cmp_eq_u16 vcc, vcc_hi, v255
 // GFX11: v_cmp_eq_u16_e64
 
-v_cmp_eq_u16 vcc_lo, vcc_lo, v255
+v_cmp_eq_u16 vcc, vcc_lo, v255
 // GFX11: v_cmp_eq_u16_e64
 
 v_cmp_f_f16 vcc, v1, v255
@@ -105,16 +105,16 @@ v_cmp_f_f16 vcc, vcc_hi, v255
 v_cmp_f_f16 vcc, vcc_lo, v255
 // GFX11: v_cmp_f_f16_e64
 
-v_cmp_f_f16 vcc_lo, v1, v255
+v_cmp_f_f16 vcc, v1, v255
 // GFX11: v_cmp_f_f16_e64
 
-v_cmp_f_f16 vcc_lo, v127, v255
+v_cmp_f_f16 vcc, v127, v255
 // GFX11: v_cmp_f_f16_e64
 
-v_cmp_f_f16 vcc_lo, vcc_hi, v255
+v_cmp_f_f16 vcc, vcc_hi, v255
 // GFX11: v_cmp_f_f16_e64
 
-v_cmp_f_f16 vcc_lo, vcc_lo, v255
+v_cmp_f_f16 vcc, vcc_lo, v255
 // GFX11: v_cmp_f_f16_e64
 
 v_cmp_ge_f16 vcc, v1, v255
@@ -129,16 +129,16 @@ v_cmp_ge_f16 vcc, vcc_hi, v255
 v_cmp_ge_f16 vcc, vcc_lo, v255
 // GFX11: v_cmp_ge_f16_e64
 
-v_cmp_ge_f16 vcc_lo, v1, v255
+v_cmp_ge_f16 vcc, v1, v255
 // GFX11: v_cmp_ge_f16_e64
 
-v_cmp_ge_f16 vcc_lo, v127, v255
+v_cmp_ge_f16 vcc, v127, v255
 // GFX11: v_cmp_ge_f16_e64
 
-v_cmp_ge_f16 vcc_lo, vcc_hi, v255
+v_cmp_ge_f16 vcc, vcc_hi, v255
 // GFX11: v_cmp_ge_f16_e64
 
-v_cmp_ge_f16 vcc_lo, vcc_lo, v255
+v_cmp_ge_f16 vcc, vcc_lo, v255
 // GFX11: v_cmp_ge_f16_e64
 
 v_cmp_ge_i16 vcc, v1, v255
@@ -153,16 +153,16 @@ v_cmp_ge_i16 vcc, vcc_hi, v255
 v_cmp_ge_i16 vcc, vcc_lo, v255
 // GFX11: v_cmp_ge_i16_e64
 
-v_cmp_ge_i16 vcc_lo, v1, v255
+v_cmp_ge_i16 vcc, v1, v255
 // GFX11: v_cmp_ge_i16_e64
 
-v_cmp_ge_i16 vcc_lo, v127, v255
+v_cmp_ge_i16 vcc, v127, v255
 // GFX11: v_cmp_ge_i16_e64
 
-v_cmp_ge_i16 vcc_lo, vcc_hi, v255
+v_cmp_ge_i16 vcc, vcc_hi, v255
 // GFX11: v_cmp_ge_i16_e64
 
-v_cmp_ge_i16 vcc_lo, vcc_lo, v255
+v_cmp_ge_i16 vcc, vcc_lo, v255
 // GFX11: v_cmp_ge_i16_e64
 
 v_cmp_ge_u16 vcc, v1, v255
@@ -177,16 +177,16 @@ v_cmp_ge_u16 vcc, vcc_hi, v255
 v_cmp_ge_u16 vcc, vcc_lo, v255
 // GFX11: v_cmp_ge_u16_e64
 
-v_cmp_ge_u16 vcc_lo, v1, v255
+v_cmp_ge_u16 vcc, v1, v255
 // GFX11: v_cmp_ge_u16_e64
 
-v_cmp_ge_u16 vcc_lo, v127, v255
+v_cmp_ge_u16 vcc, v127, v255
 // GFX11: v_cmp_ge_u16_e64
 
-v_cmp_ge_u16 vcc_lo, vcc_hi, v255
+v_cmp_ge_u16 vcc, vcc_hi, v255
 // GFX11: v_cmp_ge_u16_e64
 
-v_cmp_ge_u16 vcc_lo, vcc_lo, v255
+v_cmp_ge_u16 vcc, vcc_lo, v255
 // GFX11: v_cmp_ge_u16_e64
 
 v_cmp_gt_f16 vcc, v1, v255
@@ -201,16 +201,16 @@ v_cmp_gt_f16 vcc, vcc_hi, v255
 v_cmp_gt_f16 vcc, vcc_lo, v255
 // GFX11: v_cmp_gt_f16_e64
 
-v_cmp_gt_f16 vcc_lo, v1, v255
+v_cmp_gt_f16 vcc, v1, v255
 // GFX11: v_cmp_gt_f16_e64
 
-v_cmp_gt_f16 vcc_lo, v127, v255
+v_cmp_gt_f16 vcc, v127, v255
 // GFX11: v_cmp_gt_f16_e64
 
-v_cmp_gt_f16 vcc_lo, vcc_hi, v255
+v_cmp_gt_f16 vcc, vcc_hi, v255
 // GFX11: v_cmp_gt_f16_e64
 
-v_cmp_gt_f16 vcc_lo, vcc_lo, v255
+v_cmp_gt_f16 vcc, vcc_lo, v255
 // GFX11: v_cmp_gt_f16_e64
 
 v_cmp_gt_i16 vcc, v1, v255
@@ -225,16 +225,16 @@ v_cmp_gt_i16 vcc, vcc_hi, v255
 v_cmp_gt_i16 vcc, vcc_lo, v255
 // GFX11: v_cmp_gt_i16_e64
 
-v_cmp_gt_i16 vcc_lo, v1, v255
+v_cmp_gt_i16 vcc, v1, v255
 // GFX11: v_cmp_gt_i16_e64
 
-v_cmp_gt_i16 vcc_lo, v127, v255
+v_cmp_gt_i16 vcc, v127, v255
 // GFX11: v_cmp_gt_i16_e64
 
-v_cmp_gt_i16 vcc_lo, vcc_hi, v255
+v_cmp_gt_i16 vcc, vcc_hi, v255
 // GFX11: v_cmp_gt_i16_e64
 
-v_cmp_gt_i16 vcc_lo, vcc_lo, v255
+v_cmp_gt_i16 vcc, vcc_lo, v255
 // GFX11: v_cmp_gt_i16_e64
 
 v_cmp_gt_u16 vcc, v1, v255
@@ -249,16 +249,16 @@ v_cmp_gt_u16 vcc, vcc_hi, v255
 v_cmp_gt_u16 vcc, vcc_lo, v255
 // GFX11: v_cmp_gt_u16_e64
 
-v_cmp_gt_u16 vcc_lo, v1, v255
+v_cmp_gt_u16 vcc, v1, v255
 // GFX11: v_cmp_gt_u16_e64
 
-v_cmp_gt_u16 vcc_lo, v127, v255
+v_cmp_gt_u16 vcc, v127, v255
 // GFX11: v_cmp_gt_u16_e64
 
-v_cmp_gt_u16 vcc_lo, vcc_hi, v255
+v_cmp_gt_u16 vcc, vcc_hi, v255
 // GFX11: v_cmp_gt_u16_e64
 
-v_cmp_gt_u16 vcc_lo, vcc_lo, v255
+v_cmp_gt_u16 vcc, vcc_lo, v255
 // GFX11: v_cmp_gt_u16_e64
 
 v_cmp_le_f16 vcc, v1, v255
@@ -273,16 +273,16 @@ v_cmp_le_f16 vcc, vcc_hi, v255
 v_cmp_le_f16 vcc, vcc_lo, v255
 // GFX11: v_cmp_le_f16_e64
 
-v_cmp_le_f16 vcc_lo, v1, v255
+v_cmp_le_f16 vcc, v1, v255
 // GFX11: v_cmp_le_f16_e64
 
-v_cmp_le_f16 vcc_lo, v127, v255
+v_cmp_le_f16 vcc, v127, v255
 // GFX11: v_cmp_le_f16_e64
 
-v_cmp_le_f16 vcc_lo, vcc_hi, v255
+v_cmp_le_f16 vcc, vcc_hi, v255
 // GFX11: v_cmp_le_f16_e64
 
-v_cmp_le_f16 vcc_lo, vcc_lo, v255
+v_cmp_le_f16 vcc, vcc_lo, v255
 // GFX11: v_cmp_le_f16_e64
 
 v_cmp_le_i16 vcc, v1, v255
@@ -297,16 +297,16 @@ v_cmp_le_i16 vcc, vcc_hi, v255
 v_cmp_le_i16 vcc, vcc_lo, v255
 // GFX11: v_cmp_le_i16_e64
 
-v_cmp_le_i16 vcc_lo, v1, v255
+v_cmp_le_i16 vcc, v1, v255
 // GFX11: v_cmp_le_i16_e64
 
-v_cmp_le_i16 vcc_lo, v127, v255
+v_cmp_le_i16 vcc, v127, v255
 // GFX11: v_cmp_le_i16_e64
 
-v_cmp_le_i16 vcc_lo, vcc_hi, v255
+v_cmp_le_i16 vcc, vcc_hi, v255
 // GFX11: v_cmp_le_i16_e64
 
-v_cmp_le_i16 vcc_lo, vcc_lo, v255
+v_cmp_le_i16 vcc, vcc_lo, v255
 // GFX11: v_cmp_le_i16_e64
 
 v_cmp_le_u16 vcc, v1, v255
@@ -321,16 +321,16 @@ v_cmp_le_u16 vcc, vcc_hi, v255
 v_cmp_le_u16 vcc, vcc_lo, v255
 // GFX11: v_cmp_le_u16_e64
 
-v_cmp_le_u16 vcc_lo, v1, v255
+v_cmp_le_u16 vcc, v1, v255
 // GFX11: v_cmp_le_u16_e64
 
-v_cmp_le_u16 vcc_lo, v127, v255
+v_cmp_le_u16 vcc, v127, v255
 // GFX11: v_cmp_le_u16_e64
 
-v_cmp_le_u16 vcc_lo, vcc_hi, v255
+v_cmp_le_u16 vcc, vcc_hi, v255
 // GFX11: v_cmp_le_u16_e64
 
-v_cmp_le_u16 vcc_lo, vcc_lo, v255
+v_cmp_le_u16 vcc, vcc_lo, v255
 // GFX11: v_cmp_le_u16_e64
 
 v_cmp_lg_f16 vcc, v1, v255
@@ -345,16 +345,16 @@ v_cmp_lg_f16 vcc, vcc_hi, v255
 v_cmp_lg_f16 vcc, vcc_lo, v255
 // GFX11: v_cmp_lg_f16_e64
 
-v_cmp_lg_f16 vcc_lo, v1, v255
+v_cmp_lg_f16 vcc, v1, v255
 // GFX11: v_cmp_lg_f16_e64
 
-v_cmp_lg_f16 vcc_lo, v127, v255
+v_cmp_lg_f16 vcc, v127, v255
 // GFX11: v_cmp_lg_f16_e64
 
-v_cmp_lg_f16 vcc_lo, vcc_hi, v255
+v_cmp_lg_f16 vcc, vcc_hi, v255
 // GFX11: v_cmp_lg_f16_e64
 
-v_cmp_lg_f16 vcc_lo, vcc_lo, v255
+v_cmp_lg_f16 vcc, vcc_lo, v255
 // GFX11: v_cmp_lg_f16_e64
 
 v_cmp_lt_f16 vcc, v1, v255
@@ -369,16 +369,16 @@ v_cmp_lt_f16 vcc, vcc_hi, v255
 v_cmp_lt_f16 vcc, vcc_lo, v255
 // GFX11: v_cmp_lt_f16_e64
 
-v_cmp_lt_f16 vcc_lo, v1, v255
+v_cmp_lt_f16 vcc, v1, v255
 // GFX11: v_cmp_lt_f16_e64
 
-v_cmp_lt_f16 vcc_lo, v127, v255
+v_cmp_lt_f16 vcc, v127, v255
 // GFX11: v_cmp_lt_f16_e64
 
-v_cmp_lt_f16 vcc_lo, vcc_hi, v255
+v_cmp_lt_f16 vcc, vcc_hi, v255
 // GFX11: v_cmp_lt_f16_e64
 
-v_cmp_lt_f16 vcc_lo, vcc_lo, v255
+v_cmp_lt_f16 vcc, vcc_lo, v255
 // GFX11: v_cmp_lt_f16_e64
 
 v_cmp_lt_i16 vcc, v1, v255
@@ -393,16 +393,16 @@ v_cmp_lt_i16 vcc, vcc_hi, v255
 v_cmp_lt_i16 vcc, vcc_lo, v255
 // GFX11: v_cmp_lt_i16_e64
 
-v_cmp_lt_i16 vcc_lo, v1, v255
+v_cmp_lt_i16 vcc, v1, v255
 // GFX11: v_cmp_lt_i16_e64
 
-v_cmp_lt_i16 vcc_lo, v127, v255
+v_cmp_lt_i16 vcc, v127, v255
 // GFX11: v_cmp_lt_i16_e64
 
-v_cmp_lt_i16 vcc_lo, vcc_hi, v255
+v_cmp_lt_i16 vcc, vcc_hi, v255
 // GFX11: v_cmp_lt_i16_e64
 
-v_cmp_lt_i16 vcc_lo, vcc_lo, v255
+v_cmp_lt_i16 vcc, vcc_lo, v255
 // GFX11: v_cmp_lt_i16_e64
 
 v_cmp_lt_u16 vcc, v1, v255
@@ -417,16 +417,16 @@ v_cmp_lt_u16 vcc, vcc_hi, v255
 v_cmp_lt_u16 vcc, vcc_lo, v255
 // GFX11: v_cmp_lt_u16_e64
 
-v_cmp_lt_u16 vcc_lo, v1, v255
+v_cmp_lt_u16 vcc, v1, v255
 // GFX11: v_cmp_lt_u16_e64
 
-v_cmp_lt_u16 vcc_lo, v127, v255
+v_cmp_lt_u16 vcc, v127, v255
 // GFX11: v_cmp_lt_u16_e64
 
-v_cmp_lt_u16 vcc_lo, vcc_hi, v255
+v_cmp_lt_u16 vcc, vcc_hi, v255
 // GFX11: v_cmp_lt_u16_e64
 
-v_cmp_lt_u16 vcc_lo, vcc_lo, v255
+v_cmp_lt_u16 vcc, vcc_lo, v255
 // GFX11: v_cmp_lt_u16_e64
 
 v_cmp_ne_i16 vcc, v1, v255
@@ -441,16 +441,16 @@ v_cmp_ne_i16 vcc, vcc_hi, v255
 v_cmp_ne_i16 vcc, vcc_lo, v255
 // GFX11: v_cmp_ne_i16_e64
 
-v_cmp_ne_i16 vcc_lo, v1, v255
+v_cmp_ne_i16 vcc, v1, v255
 // GFX11: v_cmp_ne_i16_e64
 
-v_cmp_ne_i16 vcc_lo, v127, v255
+v_cmp_ne_i16 vcc, v127, v255
 // GFX11: v_cmp_ne_i16_e64
 
-v_cmp_ne_i16 vcc_lo, vcc_hi, v255
+v_cmp_ne_i16 vcc, vcc_hi, v255
 // GFX11: v_cmp_ne_i16_e64
 
-v_cmp_ne_i16 vcc_lo, vcc_lo, v255
+v_cmp_ne_i16 vcc, vcc_lo, v255
 // GFX11: v_cmp_ne_i16_e64
 
 v_cmp_ne_u16 vcc, v1, v255
@@ -465,16 +465,16 @@ v_cmp_ne_u16 vcc, vcc_hi, v255
 v_cmp_ne_u16 vcc, vcc_lo, v255
 // GFX11: v_cmp_ne_u16_e64
 
-v_cmp_ne_u16 vcc_lo, v1, v255
+v_cmp_ne_u16 vcc, v1, v255
 // GFX11: v_cmp_ne_u16_e64
 
-v_cmp_ne_u16 vcc_lo, v127, v255
+v_cmp_ne_u16 vcc, v127, v255
 // GFX11: v_cmp_ne_u16_e64
 
-v_cmp_ne_u16 vcc_lo, vcc_hi, v255
+v_cmp_ne_u16 vcc, vcc_hi, v255
 // GFX11: v_cmp_ne_u16_e64
 
-v_cmp_ne_u16 vcc_lo, vcc_lo, v255
+v_cmp_ne_u16 vcc, vcc_lo, v255
 // GFX11: v_cmp_ne_u16_e64
 
 v_cmp_neq_f16 vcc, v1, v255
@@ -489,16 +489,16 @@ v_cmp_neq_f16 vcc, vcc_hi, v255
 v_cmp_neq_f16 vcc, vcc_lo, v255
 // GFX11: v_cmp_neq_f16_e64
 
-v_cmp_neq_f16 vcc_lo, v1, v255
+v_cmp_neq_f16 vcc, v1, v255
 // GFX11: v_cmp_neq_f16_e64
 
-v_cmp_neq_f16 vcc_lo, v127, v255
+v_cmp_neq_f16 vcc, v127, v255
 // GFX11: v_cmp_neq_f16_e64
 
-v_cmp_neq_f16 vcc_lo, vcc_hi, v255
+v_cmp_neq_f16 vcc, vcc_hi, v255
 // GFX11: v_cmp_neq_f16_e64
 
-v_cmp_neq_f16 vcc_lo, vcc_lo, v255
+v_cmp_neq_f16 vcc, vcc_lo, v255
 // GFX11: v_cmp_neq_f16_e64
 
 v_cmp_nge_f16 vcc, v1, v255
@@ -513,16 +5...
[truncated]

rampitec · 2024-07-10T22:00:36Z

llvm/test/CodeGen/AMDGPU/check-subtarget-features.ll

@@ -1,5 +1,3 @@
-; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,-wavefrontsize64 < %s 2>&1 | FileCheck %s -check-prefix=ERR -implicit-check-not=error:


This diagnostics is now missing, BE will just initialize wavesize to default. If we want to catch this case we would still need to scan the original string.

rampitec · 2024-07-10T22:01:54Z

I did not remove all these -mattr=-wavefrontsize32,+wavefrontsize64 in many tests, this can be done separately. Just fixed some of them to test this change.

rampitec · 2024-07-10T22:03:13Z

Also note that clang and flang do the same thing by calling AMDGPU::insertWaveSizeFeature() from TargetParser.cpp.

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

shiltian

Do those clang tests need to be updated as well?

rampitec · 2024-07-10T23:24:24Z

Do those clang tests need to be updated as well?

No, FE is unaffected. But let's see what github testers may find.

arsenm · 2024-07-11T05:36:49Z

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

+  if (!hasFeature(AMDGPU::FeatureWavefrontSize32) &&
+      !hasFeature(AMDGPU::FeatureWavefrontSize64)) {
+    if (getGeneration() >= AMDGPUSubtarget::GFX10)
+      ToggleFeature(AMDGPU::FeatureWavefrontSize32);
+    else
+      ToggleFeature(AMDGPU::FeatureWavefrontSize64);


I think we can avoid this by having a separate SupportsWave32 feature implied by FeatureWavefrontSize32

We still need to select default somewhere. So say a subtarget has FeatureSupportsWave32, then what?

I.e. the line if (getGeneration() >= AMDGPUSubtarget::GFX10) can be changed to if (hasFeature(AMDGPU::FeatureSupportsWave32)), but that's it.

Also I shall mention, this if can be avoided altogether because subtargets without an option to select wave size just have a nailed wave64 feature in their definition. This if here is just to catch cases when tests use manual -mattr switching it off, but I can remove it and only set wave32 as a default.

Just did that, unconditionally add wave32 if none is set.

I mean if you use the implies feature, the feature parsing logic should flip the incompatible case for you instead of manually doing it here

I do not see a way to tell it is incompatible in the first place. OK, wave32 implies it supports wave32. How does it turn wave32 automatically? How does it tell that wave64 is incompatible with wave32?

IMHO to do what you want features must be:

Organized in groups.

We need to tell features in group are mutually exclusive.

There shall be a way to tell this one is the default.

None of that exists.

I mean if there's a SupportsWave32 feature, implied by FeatureWavefrontSize32, if you specify wavefrontsize32 to a wave64-only target, the incompatible feature check in the parsing logic will hit and it will assume you specified an invalid target and unset the target-cpu.

Older targets just have FeatureWavefrontSize64 in their definitions.

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

arsenm · 2024-07-11T08:26:11Z

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

+                                                 MCContext &Ctx) {
+  if (!STI.hasFeature(AMDGPU::FeatureWavefrontSize64) &&
+      !STI.hasFeature(AMDGPU::FeatureWavefrontSize32)) {
+    MCSubtargetInfo &STICopy = Ctx.getSubtargetCopy(STI);


Don't really understand this subtargetcopy business

STI is const, I cannot just flip the bit. The same is done in the AsmParser with copySTI().

arsenm · 2024-07-11T08:27:03Z

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

+  if (!hasFeature(AMDGPU::FeatureWavefrontSize32) &&
+      !hasFeature(AMDGPU::FeatureWavefrontSize64)) {
+    if (getGeneration() >= AMDGPUSubtarget::GFX10)
+      ToggleFeature(AMDGPU::FeatureWavefrontSize32);
+    else
+      ToggleFeature(AMDGPU::FeatureWavefrontSize64);


I mean if you use the implies feature, the feature parsing logic should flip the incompatible case for you instead of manually doing it here

rampitec · 2024-07-15T19:28:19Z

Ping

Summary: Processor definition shall not include a default feature which may be switched off by a different wave size. This allows not to write -mattr=-wavefrontsize32,+wavefrontsize64 in tests. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D59822435

Those are not needed now that <llvm#98400> is submitted.

…100339) Those are not needed now that <#98400> is submitted.

Summary: Processor definition shall not include a default feature which may be switched off by a different wave size. This allows not to write -mattr=-wavefrontsize32,+wavefrontsize64 in tests. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251504

…100339) Summary: Those are not needed now that <#98400> is submitted. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250755

They are no longer needed after the patch: [AMDGPU] Remove wavefrontsize feature from GFX10: llvm#98400 Note that they may still be needed if "target-features" are set to "+wavefrontsize32" or "+wavefrontsize64".

…ts (NFC) (#100711) They are no longer needed after the patch: [AMDGPU] Remove wavefrontsize feature from GFX10: #98400 The exception is when "target-features" are set to "+wavefrontsize32" or "+wavefrontsize64", we still need to remove a wavefrontsize feature before add a different one to make sure only one of them are present.

…lvm#100339) Those are not needed now that <llvm#98400> is submitted.

) Change-Id: Ic732c0ac93b9767f2b194c6a825165e3709314fc

This reverts commit b132dd4.

[AMDGPU] Remove wavefrontsize feature from GFX10+

ac6a483

Processor definition shall not include a default feature which may be switched off by a different wave size. This allows not to write -mattr=-wavefrontsize32,+wavefrontsize64 in tests.

rampitec requested review from jayfoad, arsenm and kosarev July 10, 2024 21:58

llvmbot added backend:AMDGPU mc Machine (object) code labels Jul 10, 2024

rampitec mentioned this pull request Jul 10, 2024

[AMDGPU] Remove default wavefrontsize attribute in asm parser #97617

Closed

rampitec commented Jul 10, 2024

View reviewed changes

shiltian reviewed Jul 10, 2024

View reviewed changes

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Outdated Show resolved Hide resolved

shiltian reviewed Jul 10, 2024

View reviewed changes

arsenm reviewed Jul 11, 2024

View reviewed changes

rampitec added 2 commits July 11, 2024 00:57

Just add FeatureWavefrontSize32 unconditionally

55dda8d

Older targets just have FeatureWavefrontSize64 in their definitions.

Move getIsaVersion to its original place

d09e31c

arsenm reviewed Jul 11, 2024

View reviewed changes

rampitec added 2 commits July 11, 2024 01:30

Readded braces.

f5ebe7c

Updated comment

51b32be

arsenm approved these changes Jul 16, 2024

View reviewed changes

rampitec merged commit b132dd4 into llvm:main Jul 16, 2024
7 checks passed

rampitec deleted the default-wavesize branch July 16, 2024 08:02

kosarev mentioned this pull request Jul 24, 2024

[AMDGPU][MC][NFC] Drop remaining -wavesize32/64 attributes in tests. #100339

Merged

kosarev added a commit to kosarev/llvm-project that referenced this pull request Jul 24, 2024

[AMDGPU][MC][NFC] Drop remaining -wavesize32/64 attributes in tests.

b5498b8

Those are not needed now that <llvm#98400> is submitted.

kosarev added a commit that referenced this pull request Jul 24, 2024

[AMDGPU][MC][NFC] Drop remaining -wavesize32/64 attributes in tests. (#…

e1052fa

…100339) Those are not needed now that <#98400> is submitted.

changpeng mentioned this pull request Jul 26, 2024

[AMDGPU] Remove -wavefrontsize32 and -wavefrontsize64 from GFX10+ tests (NFC) #100711

Merged

Harini0924 pushed a commit to Harini0924/llvm-project that referenced this pull request Aug 1, 2024

[AMDGPU][MC][NFC] Drop remaining -wavesize32/64 attributes in tests. (l…

1844b7d

…lvm#100339) Those are not needed now that <llvm#98400> is submitted.

searlmc1 pushed a commit to ROCm/llvm-project that referenced this pull request Aug 28, 2024

Cherrypick [AMDGPU] Remove wavefrontsize feature from GFX10+ (llvm#98400

d794f6e

) Change-Id: Ic732c0ac93b9767f2b194c6a825165e3709314fc

jrbyrnes pushed a commit to jrbyrnes/llvm-project that referenced this pull request Oct 2, 2024

Revert "[AMDGPU] Remove wavefrontsize feature from GFX10+ (llvm#98400)"

d58114a

This reverts commit b132dd4.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AMDGPU] Remove wavefrontsize feature from GFX10+ #98400

[AMDGPU] Remove wavefrontsize feature from GFX10+ #98400

rampitec commented Jul 10, 2024

llvmbot commented Jul 10, 2024 •

edited

Loading

rampitec Jul 10, 2024

rampitec commented Jul 10, 2024

rampitec commented Jul 10, 2024

shiltian left a comment

rampitec commented Jul 10, 2024 •

edited

Loading

arsenm Jul 11, 2024

rampitec Jul 11, 2024

rampitec Jul 11, 2024

rampitec Jul 11, 2024

arsenm Jul 11, 2024

rampitec Jul 11, 2024

rampitec Jul 11, 2024

arsenm Jul 16, 2024

arsenm Jul 11, 2024

rampitec Jul 11, 2024

arsenm Jul 11, 2024

rampitec commented Jul 15, 2024

		@@ -1,5 +1,3 @@
		; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,-wavefrontsize64 < %s 2>&1 \| FileCheck %s -check-prefix=ERR -implicit-check-not=error:

[AMDGPU] Remove wavefrontsize feature from GFX10+ #98400

[AMDGPU] Remove wavefrontsize feature from GFX10+ #98400

Conversation

rampitec commented Jul 10, 2024

llvmbot commented Jul 10, 2024 • edited Loading

Choose a reason for hiding this comment

rampitec commented Jul 10, 2024

rampitec commented Jul 10, 2024

shiltian left a comment

Choose a reason for hiding this comment

rampitec commented Jul 10, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

rampitec commented Jul 15, 2024

llvmbot commented Jul 10, 2024 •

edited

Loading

rampitec commented Jul 10, 2024 •

edited

Loading