-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Assign separate PseudoVSHA2MS_VV opcodes for each SEW #114317
base: main
Are you sure you want to change the base?
[RISCV] Assign separate PseudoVSHA2MS_VV opcodes for each SEW #114317
Conversation
The vsha2ms.vv from Zvknh[ab] currently supports both SEW=32 and SEW=64. It might have different performance characteristics depending on the SEW on some processors. This patch splits these two different SEWs into their own VPsuedo opcodes and scheduling classes. This is effectively a NFC change.
@llvm/pr-subscribers-backend-risc-v Author: Min-Yih Hsu (mshockwave) ChangesThe vsha2ms.vv from Zvknh[ab] currently supports both SEW=32 and SEW=64. It might have different performance characteristics depending on the SEW on some processors. This patch splits these two different SEWs into their own VPsuedo opcodes and scheduling classes. This is effectively a NFC change. Full diff: https://github.com/llvm/llvm-project/pull/114317.diff 5 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index d5b0fa340684b4..be8bc94a76ff0a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -4454,21 +4454,22 @@ class VPatTernaryNoMaskWithPolicy<string intrinsic,
ValueType result_type,
ValueType op1_type,
ValueType op2_type,
- int sew,
+ int log2sew,
LMULInfo vlmul,
VReg result_reg_class,
RegisterClass op1_reg_class,
- DAGOperand op2_kind> :
+ DAGOperand op2_kind,
+ bit sew_aware = false> :
Pat<(result_type (!cast<Intrinsic>(intrinsic)
(result_type result_reg_class:$rs3),
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
VLOpFrag, (XLenVT timm:$policy))),
- (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#!if(sew_aware, "_E"#!shl(1, log2sew), ""))
result_reg_class:$rs3,
(op1_type op1_reg_class:$rs1),
op2_kind:$rs2,
- GPR:$vl, sew, (XLenVT timm:$policy))>;
+ GPR:$vl, log2sew, (XLenVT timm:$policy))>;
class VPatTernaryNoMaskWithPolicyRoundingMode<string intrinsic,
string inst,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
index 782651fd6d0197..93020d3fc5aecc 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
@@ -274,9 +274,11 @@ multiclass VPseudoBinaryNoMaskPolicy_Zvk<VReg RetClass,
multiclass VPseudoTernaryNoMask_Zvk<VReg RetClass,
VReg Op1Class,
DAGOperand Op2Class,
- LMULInfo MInfo> {
- let VLMul = MInfo.value in
- def "_" # MInfo.MX : VPseudoTernaryNoMask_Zvk<RetClass, Op1Class, Op2Class>;
+ LMULInfo MInfo, int sew = 0> {
+ let VLMul = MInfo.value, SEW = sew in {
+ defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
+ def suffix : VPseudoTernaryNoMask_Zvk<RetClass, Op1Class, Op2Class>;
+ }
}
multiclass VPseudoBinaryV_V_NoMask_Zvk<LMULInfo m> {
@@ -348,12 +350,12 @@ multiclass VPseudoVSHA2CL {
}
}
-multiclass VPseudoVSHA2MS {
- foreach m = MxListVF4 in {
+multiclass VPseudoVSHA2MS<int sew = 0> {
+ foreach m = !if(!eq(sew, 64), MxListVF8, MxListVF4) in {
defvar mx = m.MX;
- defm _VV : VPseudoTernaryNoMask_Zvk<m.vrclass, m.vrclass, m.vrclass, m>,
+ defm _VV : VPseudoTernaryNoMask_Zvk<m.vrclass, m.vrclass, m.vrclass, m, sew = sew>,
SchedTernary<"WriteVSHA2MSV", "ReadVSHA2MSV", "ReadVSHA2MSV",
- "ReadVSHA2MSV", mx>;
+ "ReadVSHA2MSV", mx, sew>;
}
}
@@ -564,7 +566,9 @@ let Predicates = [HasStdExtZvkned] in {
let Predicates = [HasStdExtZvknhaOrZvknhb] in {
defm PseudoVSHA2CH : VPseudoVSHA2CH;
defm PseudoVSHA2CL : VPseudoVSHA2CL;
- defm PseudoVSHA2MS : VPseudoVSHA2MS;
+ defm PseudoVSHA2MS : VPseudoVSHA2MS<sew=32>;
+ let Predicates = [HasStdExtZvknhb] in
+ defm PseudoVSHA2MS : VPseudoVSHA2MS<sew=64>;
} // Predicates = [HasStdExtZvknhaOrZvknhb]
let Predicates = [HasStdExtZvksed] in {
@@ -944,12 +948,14 @@ multiclass VPatUnaryV_V_S_NoMask_Zvk<string intrinsic, string instruction,
}
multiclass VPatBinaryV_VV_NoMask<string intrinsic, string instruction,
- list<VTypeInfo> vtilist> {
+ list<VTypeInfo> vtilist,
+ bit isSEWAware = false> {
foreach vti = vtilist in
def : VPatTernaryNoMaskWithPolicy<intrinsic, instruction, "VV",
vti.Vector, vti.Vector, vti.Vector,
vti.Log2SEW, vti.LMul, vti.RegClass,
- vti.RegClass, vti.RegClass>;
+ vti.RegClass, vti.RegClass,
+ sew_aware = isSEWAware>;
}
multiclass VPatBinaryV_VI_NoMask<string intrinsic, string instruction,
@@ -1101,13 +1107,13 @@ let Predicates = [HasStdExtZvkned] in {
let Predicates = [HasStdExtZvknha] in {
defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ch", "PseudoVSHA2CH", I32IntegerVectors>;
defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CH", I32IntegerVectors>;
- defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32IntegerVectors>;
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32IntegerVectors, isSEWAware=true>;
} // Predicates = [HasStdExtZvknha]
let Predicates = [HasStdExtZvknhb] in {
defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ch", "PseudoVSHA2CH", I32I64IntegerVectors>;
defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2cl", "PseudoVSHA2CH", I32I64IntegerVectors>;
- defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32I64IntegerVectors>;
+ defm : VPatBinaryV_VV_NoMask<"int_riscv_vsha2ms", "PseudoVSHA2MS", I32I64IntegerVectors, isSEWAware=true>;
} // Predicates = [HasStdExtZvknhb]
let Predicates = [HasStdExtZvksed] in {
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
index 7a54d2fe108080..1af89903e0068c 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
@@ -883,7 +883,8 @@ foreach mx = SchedMxList in {
let Latency = 3, ReleaseAtCycles = [LMulLat] in {
defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSHA2MSV", [SiFiveP400VEXQ0], mx, IsWorstCase>;
+ foreach sew = !listremove(SchedSEWSet<mx>.val, [8, 16]) in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP400VEXQ0], mx, sew, IsWorstCase>;
}
// Zvkned
let Latency = 2, ReleaseAtCycles = [LMulLat] in {
@@ -1213,7 +1214,7 @@ defm "" : LMULReadAdvance<"ReadVGMULV", 0>;
// Zvknha or Zvknhb
defm "" : LMULReadAdvance<"ReadVSHA2CHV", 0>;
defm "" : LMULReadAdvance<"ReadVSHA2CLV", 0>;
-defm "" : LMULReadAdvance<"ReadVSHA2MSV", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVSHA2MSV", 0>;
// Zvkned
defm "" : LMULReadAdvance<"ReadVAESMVV", 0>;
defm "" : LMULReadAdvance<"ReadVAESKF1V", 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
index c685a6d2b094be..51aa003139fbad 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
@@ -791,7 +791,8 @@ foreach mx = SchedMxList in {
let Latency = 3, ReleaseAtCycles = [LMulLat] in {
defm "" : LMULWriteResMX<"WriteVSHA2CHV", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVSHA2CLV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSHA2MSV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ foreach sew = !listremove(SchedSEWSet<mx>.val, [8, 16]) in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVSHA2MSV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
}
// Zvkned
let Latency = 2, ReleaseAtCycles = [LMulLat] in {
@@ -1119,7 +1120,7 @@ defm "" : LMULReadAdvance<"ReadVGMULV", 0>;
// Zvknha or Zvknhb
defm "" : LMULReadAdvance<"ReadVSHA2CHV", 0>;
defm "" : LMULReadAdvance<"ReadVSHA2CLV", 0>;
-defm "" : LMULReadAdvance<"ReadVSHA2MSV", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVSHA2MSV", 0>;
// Zvkned
defm "" : LMULReadAdvance<"ReadVAESMVV", 0>;
defm "" : LMULReadAdvance<"ReadVAESKF1V", 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleZvk.td b/llvm/lib/Target/RISCV/RISCVScheduleZvk.td
index 640c456322f022..62d9bab0fac85f 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleZvk.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleZvk.td
@@ -36,7 +36,7 @@ defm "" : LMULSchedWrites<"WriteVGMULV">;
/// Zvknha or Zvknhb extensions
defm "" : LMULSchedWrites<"WriteVSHA2CHV">;
defm "" : LMULSchedWrites<"WriteVSHA2CLV">;
-defm "" : LMULSchedWrites<"WriteVSHA2MSV">;
+defm "" : LMULSEWSchedWrites<"WriteVSHA2MSV">;
/// Zvkned extension
defm "" : LMULSchedWrites<"WriteVAESMVV">;
@@ -79,7 +79,7 @@ defm "" : LMULSchedReads<"ReadVGMULV">;
/// Zvknha or Zvknhb extensions
defm "" : LMULSchedReads<"ReadVSHA2CHV">;
defm "" : LMULSchedReads<"ReadVSHA2CLV">;
-defm "" : LMULSchedReads<"ReadVSHA2MSV">;
+defm "" : LMULSEWSchedReads<"ReadVSHA2MSV">;
/// Zvkned extension
defm "" : LMULSchedReads<"ReadVAESMVV">;
@@ -153,11 +153,11 @@ multiclass UnsupportedSchedZvknhaOrZvknhb {
let Unsupported = true in {
defm "" : LMULWriteRes<"WriteVSHA2CHV", []>;
defm "" : LMULWriteRes<"WriteVSHA2CLV", []>;
-defm "" : LMULWriteRes<"WriteVSHA2MSV", []>;
+defm "" : LMULSEWWriteRes<"WriteVSHA2MSV", []>;
defm "" : LMULReadAdvance<"ReadVSHA2CHV", 0>;
defm "" : LMULReadAdvance<"ReadVSHA2CLV", 0>;
-defm "" : LMULReadAdvance<"ReadVSHA2MSV", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVSHA2MSV", 0>;
}
}
|
@@ -564,7 +566,9 @@ let Predicates = [HasStdExtZvkned] in { | |||
let Predicates = [HasStdExtZvknhaOrZvknhb] in { | |||
defm PseudoVSHA2CH : VPseudoVSHA2CH; | |||
defm PseudoVSHA2CL : VPseudoVSHA2CL; | |||
defm PseudoVSHA2MS : VPseudoVSHA2MS; | |||
defm PseudoVSHA2MS : VPseudoVSHA2MS<sew=32>; | |||
let Predicates = [HasStdExtZvknhb] in |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Which let
wins here. This one or the one on line 566?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this one (line 570) wins.
The vsha2ms.vv from Zvknh[ab] currently supports both SEW=32 and SEW=64. It might have different performance characteristics depending on the SEW on some processors. This patch splits these two different SEWs into their own VPsuedo opcodes and scheduling classes.
This is effectively a NFC change.