Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISCV] Assign separate PseudoVSHA2MS_VV opcodes for each SEW #114317

Open
wants to merge 2 commits into
base: main
Choose a base branch
from

Conversation

mshockwave
Copy link
Member

The vsha2ms.vv from Zvknh[ab] currently supports both SEW=32 and SEW=64. It might have different performance characteristics depending on the SEW on some processors. This patch splits these two different SEWs into their own VPsuedo opcodes and scheduling classes.

This is effectively a NFC change.

The vsha2ms.vv from Zvknh[ab] currently supports both SEW=32 and SEW=64.
It might have different performance characteristics depending on the
SEW on some processors. This patch splits these two different SEWs into
their own VPsuedo opcodes and scheduling classes.

This is effectively a NFC change.
@llvmbot
Copy link
Collaborator

llvmbot commented Oct 30, 2024

@llvm/pr-subscribers-backend-risc-v

Author: Min-Yih Hsu (mshockwave)

Changes

The vsha2ms.vv from Zvknh[ab] currently supports both SEW=32 and SEW=64. It might have different performance characteristics depending on the SEW on some processors. This patch splits these two different SEWs into their own VPsuedo opcodes and scheduling classes.

This is effectively a NFC change.


Full diff: https://github.com/llvm/llvm-project/pull/114317.diff

5 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td (+5-4)
  • (modified) llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td (+18-12)
  • (modified) llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td (+3-2)
  • (modified) llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td (+3-2)
  • (modified) llvm/lib/Target/RISCV/RISCVScheduleZvk.td (+4-4)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index d5b0fa340684b4..be8bc94a76ff0a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -4454,21 +4454,22 @@ class VPatTernaryNoMaskWithPolicy<string intrinsic,
                                   ValueType result_type,
                                   ValueType op1_type,
                                   ValueType op2_type,
-                                  int sew,
+                                  int log2sew,
                                   LMULInfo vlmul,
                                   VReg result_reg_class,
                                   RegisterClass op1_reg_class,
-                                  DAGOperand op2_kind> :
+                                  DAGOperand op2_kind,
+                                  bit sew_aware = false> :
   Pat<(result_type (!cast<Intrinsic>(intrinsic)
                     (result_type result_reg_class:$rs3),
                     (op1_type op1_reg_class:$rs1),
                     (op2_type op2_kind:$rs2),
                     VLOpFrag, (XLenVT timm:$policy))),
-                   (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
+                   (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#!if(sew_aware, "_E"#!shl(1, log2sew), ""))
                     result_reg_class:$rs3,
                     (op1_type op1_reg_class:$rs1),
                     op2_kind:$rs2,
-                    GPR:$vl, sew, (XLenVT timm:$policy))>;
+                    GPR:$vl, log2sew, (XLenVT timm:$policy))>;
 
 class VPatTernaryNoMaskWithPolicyRoundingMode<string intrinsic,
                                   string inst,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
index 782651fd6d0197..93020d3fc5aecc 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
@@ -274,9 +274,11 @@ multiclass VPseudoBinaryNoMaskPolicy_Zvk<VReg RetClass,
 multiclass VPseudoTernaryNoMask_Zvk<VReg RetClass,
                                     VReg Op1Class,
                                     DAGOperand Op2Class,
-                                    LMULInfo MInfo> {
-  let VLMul = MInfo.value in
-    def "_" # MInfo.MX : VPseudoTernaryNoMask_Zvk<RetClass, Op1Class, Op2Class>;
+                                    LMULInfo MInfo, int sew = 0> {
+  let VLMul = MInfo.value, SEW = sew in {
+    defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
+    def suffix : VPseudoTernaryNoMask_Zvk<RetClass, Op1Class, Op2Class>;
+  }
 }
 
 multiclass VPseudoBinaryV_V_NoMask_Zvk<LMULInfo m> {
@@ -348,12 +350,12 @@ multiclass VPseudoVSHA2CL {
   }
 }
 
-multiclass VPseudoVSHA2MS {
-  foreach m = MxListVF4 in {
+multiclass VPseudoVSHA2MS<int sew = 0> {
+  foreach m = !if(!eq(sew, 64), MxListVF8, MxListVF4) in {
     defvar mx = m.MX;
-    defm _VV : VPseudoTernaryNoMask_Zvk<m.vrclass, m.vrclass, m.vrclass, m>,
+    defm _VV : VPseudoTernaryNoMask_Zvk<m.vrclass, m.vrclass, m.vrclass, m, sew = sew>,
                SchedTernary<"WriteVSHA2MSV", "ReadVSHA2MSV", "ReadVSHA2MSV",
-                            "ReadVSHA2MSV", mx>;
+                            "ReadVSHA2MSV", mx, sew>;
   }
 }
 
@@ -564,7 +566,9 @@ let Predicates = [HasStdExtZvkned] in {
 let Predicates = [HasStdExtZvknhaOrZvknhb] in {
   defm PseudoVSHA2CH : VPseudoVSHA2CH;
   defm PseudoVSHA2CL : VPseudoVSHA2CL;
-  defm PseudoVSHA2MS : VPseudoVSHA2MS;
+  defm PseudoVSHA2MS : VPseudoVSHA2MS<sew=32>;
+  let Predicates = [HasStdExtZvknhb] in
+  defm PseudoVSHA2MS : VPseudoVSHA2MS<sew=64>;
 } // Predicates = [HasStdExtZvknhaOrZvknhb]
 
 let Predicates = [HasStdExtZvksed] in {
@@ -944,12 +948,14 @@ multiclass VPatUnaryV_V_S_NoMask_Zvk<string intrinsic, string instruction,
 }
 
 multiclass VPatBinaryV_VV_NoMask<string intrinsic, string instruction,
-                                 list<VTypeInfo> vtilist> {
+                                 list<VTypeInfo> vtilist,
+                                 bit isSEWAware = false> {
   foreach vti = vtilist in
     def : VPatTernaryNoMaskWithPolicy<intrinsic, instruction, "VV",
                                       vti.Vector, vti.Vector, vti.Vector,
                                       vti.Log2SEW, vti.LMul, vti.RegClass,
-                                      vti.RegClass, vti.RegClass>;
+                                      vti.RegClass, vti.RegClass,
+                                      sew_aware = isSEWAware>;
 }
 
 multiclass VPatBinaryV_VI_NoMask<string intrinsic, string instruction,
@@ -1101,13 +1107,13 @@ let Predicates = [HasStdExtZvkned] in {
 let Predicates = [HasStdExtZvknha] in {
   defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ch", "PseudoVSHA2CH", I32IntegerVectors>;
   defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CH", I32IntegerVectors>;
-  defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32IntegerVectors>;
+  defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32IntegerVectors, isSEWAware=true>;
 } // Predicates = [HasStdExtZvknha]
 
 let Predicates = [HasStdExtZvknhb] in {
   defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ch", "PseudoVSHA2CH", I32I64IntegerVectors>;
   defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CH", I32I64IntegerVectors>;
-  defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32I64IntegerVectors>;
+  defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32I64IntegerVectors, isSEWAware=true>;
 } // Predicates = [HasStdExtZvknhb]
 
 let Predicates = [HasStdExtZvksed] in {
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
index 7a54d2fe108080..1af89903e0068c 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
@@ -883,7 +883,8 @@ foreach mx = SchedMxList in {
   let Latency = 3, ReleaseAtCycles = [LMulLat] in {
     defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
     defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
-    defm "" : LMULWriteResMX<"WriteVSHA2MSV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
+    foreach sew = !listremove(SchedSEWSet<mx>.val, [8, 16]) in
+    defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
   }
   // Zvkned
   let Latency = 2, ReleaseAtCycles = [LMulLat] in {
@@ -1213,7 +1214,7 @@ defm "" : LMULReadAdvance<"ReadVGMULV", 0>;
 // Zvknha or Zvknhb
 defm "" : LMULReadAdvance<"ReadVSHA2CHV", 0>;
 defm "" : LMULReadAdvance<"ReadVSHA2CLV", 0>;
-defm "" : LMULReadAdvance<"ReadVSHA2MSV", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVSHA2MSV", 0>;
 // Zvkned
 defm "" : LMULReadAdvance<"ReadVAESMVV", 0>;
 defm "" : LMULReadAdvance<"ReadVAESKF1V", 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
index c685a6d2b094be..51aa003139fbad 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
@@ -791,7 +791,8 @@ foreach mx = SchedMxList in {
   let Latency = 3, ReleaseAtCycles = [LMulLat] in {
     defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP600VectorArith], mx, IsWorstCase>;
     defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP600VectorArith], mx, IsWorstCase>;
-    defm "" : LMULWriteResMX<"WriteVSHA2MSV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+    foreach sew = !listremove(SchedSEWSet<mx>.val, [8, 16]) in
+    defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
   }
   // Zvkned
   let Latency = 2, ReleaseAtCycles = [LMulLat] in {
@@ -1119,7 +1120,7 @@ defm "" : LMULReadAdvance<"ReadVGMULV", 0>;
 // Zvknha or Zvknhb
 defm "" : LMULReadAdvance<"ReadVSHA2CHV", 0>;
 defm "" : LMULReadAdvance<"ReadVSHA2CLV", 0>;
-defm "" : LMULReadAdvance<"ReadVSHA2MSV", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVSHA2MSV", 0>;
 // Zvkned
 defm "" : LMULReadAdvance<"ReadVAESMVV", 0>;
 defm "" : LMULReadAdvance<"ReadVAESKF1V", 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleZvk.td b/llvm/lib/Target/RISCV/RISCVScheduleZvk.td
index 640c456322f022..62d9bab0fac85f 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleZvk.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleZvk.td
@@ -36,7 +36,7 @@ defm "" : LMULSchedWrites<"WriteVGMULV">;
 /// Zvknha or Zvknhb extensions
 defm "" : LMULSchedWrites<"WriteVSHA2CHV">;
 defm "" : LMULSchedWrites<"WriteVSHA2CLV">;
-defm "" : LMULSchedWrites<"WriteVSHA2MSV">;
+defm "" : LMULSEWSchedWrites<"WriteVSHA2MSV">;
 
 /// Zvkned extension
 defm "" : LMULSchedWrites<"WriteVAESMVV">;
@@ -79,7 +79,7 @@ defm "" : LMULSchedReads<"ReadVGMULV">;
 /// Zvknha or Zvknhb extensions
 defm "" : LMULSchedReads<"ReadVSHA2CHV">;
 defm "" : LMULSchedReads<"ReadVSHA2CLV">;
-defm "" : LMULSchedReads<"ReadVSHA2MSV">;
+defm "" : LMULSEWSchedReads<"ReadVSHA2MSV">;
 
 /// Zvkned extension
 defm "" : LMULSchedReads<"ReadVAESMVV">;
@@ -153,11 +153,11 @@ multiclass UnsupportedSchedZvknhaOrZvknhb {
 let Unsupported = true in {
 defm "" : LMULWriteRes<"WriteVSHA2CHV", []>;
 defm "" : LMULWriteRes<"WriteVSHA2CLV", []>;
-defm "" : LMULWriteRes<"WriteVSHA2MSV", []>;
+defm "" : LMULSEWWriteRes<"WriteVSHA2MSV", []>;
 
 defm "" : LMULReadAdvance<"ReadVSHA2CHV", 0>;
 defm "" : LMULReadAdvance<"ReadVSHA2CLV", 0>;
-defm "" : LMULReadAdvance<"ReadVSHA2MSV", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVSHA2MSV", 0>;
 }
 }
 

@@ -564,7 +566,9 @@ let Predicates = [HasStdExtZvkned] in {
let Predicates = [HasStdExtZvknhaOrZvknhb] in {
defm PseudoVSHA2CH : VPseudoVSHA2CH;
defm PseudoVSHA2CL : VPseudoVSHA2CL;
defm PseudoVSHA2MS : VPseudoVSHA2MS;
defm PseudoVSHA2MS : VPseudoVSHA2MS<sew=32>;
let Predicates = [HasStdExtZvknhb] in
Copy link
Collaborator

@topperc topperc Oct 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which let wins here. This one or the one on line 566?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this one (line 570) wins.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants