-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LegalizeTypes] Use VP_AND and VP_SHL/VP_SRA to promote operands fo VP arithmetic. #92799
Conversation
Use it for 2 places in LegalizeIntegerTypes that created a VP_AND.
…P arithmetic. This adds VPSExtPromotedInteger and VPZExtPromotedInteger and uses them to promote many arithmetic operations. VPSExtPromotedInteger uses a shift pair because we don't have VP_SIGN_EXTEND_INREG yet.
@llvm/pr-subscribers-llvm-selectiondag Author: Craig Topper (topperc) ChangesThis adds VPSExtPromotedInteger and VPZExtPromotedInteger and uses them to promote many arithmetic operations. VPSExtPromotedInteger uses a shift pair because we don't have VP_SIGN_EXTEND_INREG yet. Patch is 42.76 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/92799.diff 29 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index ed6962685f7b0..96a6270690468 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -991,6 +991,11 @@ class SelectionDAG {
/// value assuming it was the smaller SrcTy value.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT);
+ /// Return the expression required to zero extend the Op
+ /// value assuming it was the smaller SrcTy value.
+ SDValue getVPZeroExtendInReg(SDValue Op, SDValue Mask, SDValue EVL,
+ const SDLoc &DL, EVT VT);
+
/// Convert Op, which must be of integer type, to the integer type VT, by
/// either truncating it or performing either zero or sign extension as
/// appropriate extension for the pointer's semantics.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 7d3be72995239..08508425a2cc8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -646,18 +646,21 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
}
}
- // Zero extend to the promoted type and do the count there.
- SDValue Op = ZExtPromotedInteger(N->getOperand(0));
-
// Subtract off the extra leading bits in the bigger type.
SDValue ExtractLeadingBits = DAG.getConstant(
NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl, NVT);
- if (!N->isVPOpcode())
+ if (!N->isVPOpcode()) {
+ // Zero extend to the promoted type and do the count there.
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::SUB, dl, NVT,
DAG.getNode(N->getOpcode(), dl, NVT, Op),
ExtractLeadingBits);
+ }
+
SDValue Mask = N->getOperand(1);
SDValue EVL = N->getOperand(2);
+ // Zero extend to the promoted type and do the count there.
+ SDValue Op = VPZExtPromotedInteger(N->getOperand(0), Mask, EVL);
return DAG.getNode(ISD::VP_SUB, dl, NVT,
DAG.getNode(N->getOpcode(), dl, NVT, Op, Mask, EVL),
ExtractLeadingBits, Mask, EVL);
@@ -681,11 +684,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) {
}
// Zero extend to the promoted type and do the count or parity there.
- SDValue Op = ZExtPromotedInteger(N->getOperand(0));
- if (!N->isVPOpcode())
+ if (!N->isVPOpcode()) {
+ SDValue Op = ZExtPromotedInteger(N->getOperand(0));
return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op);
- return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op,
- N->getOperand(1), N->getOperand(2));
+ }
+
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+ SDValue Op = VPZExtPromotedInteger(N->getOperand(0), Mask, EVL);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op, Mask,
+ EVL);
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
@@ -1335,12 +1343,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FFREXP(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
SDValue LHS = GetPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
- if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
- RHS = ZExtPromotedInteger(RHS);
- if (N->getOpcode() != ISD::VP_SHL)
+ if (N->getOpcode() != ISD::VP_SHL) {
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = ZExtPromotedInteger(RHS);
+
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ }
+
+ SDValue Mask = N->getOperand(2);
+ SDValue EVL = N->getOperand(3);
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = VPZExtPromotedInteger(RHS, Mask, EVL);
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
- N->getOperand(2), N->getOperand(3));
+ Mask, EVL);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
@@ -1364,27 +1379,39 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) {
- // Sign extend the input.
- SDValue LHS = SExtPromotedInteger(N->getOperand(0));
- SDValue RHS = SExtPromotedInteger(N->getOperand(1));
- if (N->getNumOperands() == 2)
+ if (N->getNumOperands() == 2) {
+ // Sign extend the input.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ }
assert(N->getNumOperands() == 4 && "Unexpected number of operands!");
assert(N->isVPOpcode() && "Expected VP opcode");
+ SDValue Mask = N->getOperand(2);
+ SDValue EVL = N->getOperand(3);
+ // Sign extend the input.
+ SDValue LHS = VPSExtPromotedInteger(N->getOperand(0), Mask, EVL);
+ SDValue RHS = VPSExtPromotedInteger(N->getOperand(1), Mask, EVL);
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
- N->getOperand(2), N->getOperand(3));
+ Mask, EVL);
}
SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) {
- // Zero extend the input.
- SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
- SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
- if (N->getNumOperands() == 2)
+ if (N->getNumOperands() == 2) {
+ // Zero extend the input.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ }
assert(N->getNumOperands() == 4 && "Unexpected number of operands!");
assert(N->isVPOpcode() && "Expected VP opcode");
+ // Zero extend the input.
+ SDValue Mask = N->getOperand(2);
+ SDValue EVL = N->getOperand(3);
+ SDValue LHS = VPZExtPromotedInteger(N->getOperand(0), Mask, EVL);
+ SDValue RHS = VPZExtPromotedInteger(N->getOperand(1), Mask, EVL);
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
- N->getOperand(2), N->getOperand(3));
+ Mask, EVL);
}
SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) {
@@ -1400,27 +1427,43 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
- // The input value must be properly sign extended.
- SDValue LHS = SExtPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
- if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
- RHS = ZExtPromotedInteger(RHS);
- if (N->getOpcode() != ISD::VP_ASHR)
+ if (N->getOpcode() != ISD::VP_ASHR) {
+ // The input value must be properly sign extended.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = ZExtPromotedInteger(RHS);
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ }
+
+ SDValue Mask = N->getOperand(2);
+ SDValue EVL = N->getOperand(3);
+ // The input value must be properly sign extended.
+ SDValue LHS = VPSExtPromotedInteger(N->getOperand(0), Mask, EVL);
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = VPZExtPromotedInteger(RHS, Mask, EVL);
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
- N->getOperand(2), N->getOperand(3));
+ Mask, EVL);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
- // The input value must be properly zero extended.
- SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
- if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
- RHS = ZExtPromotedInteger(RHS);
- if (N->getOpcode() != ISD::VP_LSHR)
+ if (N->getOpcode() != ISD::VP_LSHR) {
+ // The input value must be properly zero extended.
+ SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = ZExtPromotedInteger(RHS);
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ }
+
+ SDValue Mask = N->getOperand(2);
+ SDValue EVL = N->getOperand(3);
+ // The input value must be properly zero extended.
+ SDValue LHS = VPZExtPromotedInteger(N->getOperand(0), Mask, EVL);
+ if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
+ RHS = VPZExtPromotedInteger(RHS, Mask, EVL);
return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
- N->getOperand(2), N->getOperand(3));
+ Mask, EVL);
}
SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) {
@@ -1487,7 +1530,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VPFunnelShift(SDNode *N) {
SDValue Mask = N->getOperand(3);
SDValue EVL = N->getOperand(4);
if (getTypeAction(Amt.getValueType()) == TargetLowering::TypePromoteInteger)
- Amt = ZExtPromotedInteger(Amt);
+ Amt = VPZExtPromotedInteger(Amt, Mask, EVL);
EVT AmtVT = Amt.getValueType();
SDLoc DL(N);
@@ -1511,10 +1554,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VPFunnelShift(SDNode *N) {
!TLI.isOperationLegalOrCustom(Opcode, VT)) {
SDValue HiShift = DAG.getConstant(OldBits, DL, VT);
Hi = DAG.getNode(ISD::VP_SHL, DL, VT, Hi, HiShift, Mask, EVL);
- APInt Imm = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
- OldVT.getScalarSizeInBits());
- Lo = DAG.getNode(ISD::VP_AND, DL, VT, Lo, DAG.getConstant(Imm, DL, VT),
- Mask, EVL);
+ Lo = DAG.getVPZeroExtendInReg(Lo, Mask, EVL, DL, OldVT);
SDValue Res = DAG.getNode(ISD::VP_OR, DL, VT, Hi, Lo, Mask, EVL);
Res = DAG.getNode(IsFSHR ? ISD::VP_LSHR : ISD::VP_SHL, DL, VT, Res, Amt,
Mask, EVL);
@@ -2377,10 +2417,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VP_ZERO_EXTEND(SDNode *N) {
// FIXME: There is no VP_ANY_EXTEND yet.
Op = DAG.getNode(ISD::VP_ZERO_EXTEND, dl, VT, Op, N->getOperand(1),
N->getOperand(2));
- APInt Imm = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
- N->getOperand(0).getScalarValueSizeInBits());
- return DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(Imm, dl, VT),
- N->getOperand(1), N->getOperand(2));
+ return DAG.getVPZeroExtendInReg(Op, N->getOperand(1), N->getOperand(2), dl,
+ N->getOperand(0).getValueType());
}
SDValue DAGTypeLegalizer::PromoteIntOp_FIX(SDNode *N) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d925089d5689f..b795e357e82d4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -275,6 +275,27 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
return DAG.getZeroExtendInReg(Op, dl, OldVT);
}
+ /// Get a promoted operand and zero extend it to the final size.
+ SDValue VPSExtPromotedInteger(SDValue Op, SDValue Mask, SDValue EVL) {
+ EVT OldVT = Op.getValueType();
+ SDLoc dl(Op);
+ Op = GetPromotedInteger(Op);
+ // FIXME: Add VP_SIGN_EXTEND_INREG.
+ EVT VT = Op.getValueType();
+ unsigned BitsDiff = VT.getScalarSizeInBits() - OldVT.getScalarSizeInBits();
+ SDValue ShiftCst = DAG.getShiftAmountConstant(BitsDiff, VT, dl);
+ SDValue Shl = DAG.getNode(ISD::VP_SHL, dl, VT, Op, ShiftCst, Mask, EVL);
+ return DAG.getNode(ISD::VP_ASHR, dl, VT, Shl, ShiftCst, Mask, EVL);
+ }
+
+ /// Get a promoted operand and zero extend it to the final size.
+ SDValue VPZExtPromotedInteger(SDValue Op, SDValue Mask, SDValue EVL) {
+ EVT OldVT = Op.getValueType();
+ SDLoc dl(Op);
+ Op = GetPromotedInteger(Op);
+ return DAG.getVPZeroExtendInReg(Op, Mask, EVL, dl, OldVT);
+ }
+
// Promote the given operand V (vector or scalar) according to N's specific
// reduction kind. N must be an integer VECREDUCE_* or VP_REDUCE_*. Returns
// the nominal extension opcode (ISD::(ANY|ZERO|SIGN)_EXTEND) and the
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 72685a2d77216..777bbf071732e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1540,6 +1540,25 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
return getNode(ISD::AND, DL, OpVT, Op, getConstant(Imm, DL, OpVT));
}
+SDValue SelectionDAG::getVPZeroExtendInReg(SDValue Op, SDValue Mask,
+ SDValue EVL, const SDLoc &DL,
+ EVT VT) {
+ EVT OpVT = Op.getValueType();
+ assert(VT.isInteger() && OpVT.isInteger() &&
+ "Cannot getVPZeroExtendInReg FP types");
+ assert(VT.isVector() && OpVT.isVector() &&
+ "getVPZeroExtendInReg type and operand type should be vector!");
+ assert(VT.getVectorElementCount() == OpVT.getVectorElementCount() &&
+ "Vector element counts must match in getZeroExtendInReg");
+ assert(VT.bitsLE(OpVT) && "Not extending!");
+ if (OpVT == VT)
+ return Op;
+ APInt Imm = APInt::getLowBitsSet(OpVT.getScalarSizeInBits(),
+ VT.getScalarSizeInBits());
+ return getNode(ISD::VP_AND, DL, OpVT, Op, getConstant(Imm, DL, OpVT), Mask,
+ EVL);
+}
+
SDValue SelectionDAG::getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
// Only unsigned pointer semantics are supported right now. In the future this
// might delegate to TLI to check pointer signedness.
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
index 86086f5dc88f7..3179d711f17fc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
@@ -2574,9 +2574,8 @@ define <vscale x 1 x i9> @vp_ctlz_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1
; CHECK-LABEL: vp_ctlz_nxv1i9:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 511
-; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vand.vx v8, v8, a1
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vand.vx v8, v8, a1, v0.t
; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t
@@ -2593,9 +2592,8 @@ define <vscale x 1 x i9> @vp_ctlz_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1
; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i9:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: li a1, 511
-; CHECK-ZVBB-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
-; CHECK-ZVBB-NEXT: vand.vx v8, v8, a1
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT: vand.vx v8, v8, a1, v0.t
; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t
; CHECK-ZVBB-NEXT: li a0, 7
; CHECK-ZVBB-NEXT: vsub.vx v8, v8, a0, v0.t
@@ -2607,9 +2605,8 @@ define <vscale x 1 x i9> @vp_ctlz_zero_undef_nxv1i9(<vscale x 1 x i9> %va, <vsca
; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i9:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 511
-; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vand.vx v8, v8, a1
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vand.vx v8, v8, a1, v0.t
; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t
@@ -2624,9 +2621,8 @@ define <vscale x 1 x i9> @vp_ctlz_zero_undef_nxv1i9(<vscale x 1 x i9> %va, <vsca
; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i9:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: li a1, 511
-; CHECK-ZVBB-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
-; CHECK-ZVBB-NEXT: vand.vx v8, v8, a1
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT: vand.vx v8, v8, a1, v0.t
; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t
; CHECK-ZVBB-NEXT: li a0, 7
; CHECK-ZVBB-NEXT: vsub.vx v8, v8, a0, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
index 883f68aec1f42..bb4ae7717e4f0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
@@ -2546,9 +2546,8 @@ define <vscale x 1 x i9> @vp_ctpop_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i
; CHECK-LABEL: vp_ctpop_nxv1i9:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 511
-; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vand.vx v8, v8, a1
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vand.vx v8, v8, a1, v0.t
; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t
; CHECK-NEXT: lui a0, 5
; CHECK-NEXT: addi a0, a0, 1365
@@ -2573,9 +2572,8 @@ define <vscale x 1 x i9> @vp_ctpop_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i
; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i9:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: li a1, 511
-; CHECK-ZVBB-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
-; CHECK-ZVBB-NEXT: vand.vx v8, v8, a1
; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT: vand.vx v8, v8, a1, v0.t
; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
; CHECK-ZVBB-NEXT: ret
%v = call <vscale x 1 x i9> @llvm.vp.ctpop.nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll
index 29f8eaba90052..e3c7d02462cc7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll
@@ -9,12 +9,11 @@ declare <8 x i7> @llvm.vp.sdiv.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
define <8 x i7> @vdiv_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vdiv_vv_v8i7:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vadd.vv v9, v9, v9
-; CHECK-NEXT: vsra.vi v9, v9, 1
-; CHECK-NEXT: vadd.vv v8, v8, v8
-; CHECK-NEXT: vsra.vi v8, v8, 1
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsll.vi v9, v9, 1, v0.t
+; CHECK-NEXT: vsra.vi v9, v9, 1, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vsra.vi v8, v8, 1, v0.t
; CHECK-NEXT: vdiv.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
%v = call <8 x i7> @llvm.vp.sdiv.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll
index 3f8eb0ff276b7..03bd85bf5e69e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll
@@ -10,10 +10,9 @@ define <8 x i7> @vdivu_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroe
; CHECK-LABEL: vdivu_vv_v8i7:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 127
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vand.vx v9, v9, a1
-; CHECK-NEXT: vand.vx v8, v8, a1
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vand.vx v9, v9, a1, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a1, v0.t
; CHECK-NEXT: vdivu.vv v8, v8, v9, v0.t
; CHECK-NEXT: ret
%v = call <8 x i7> @llvm.vp.udiv.v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 %evl)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll
index 3db44e87109bd..56c436212ce7d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll
@@ -9,12 +9,11 @@ declare <8 x i7> @llvm.vp.smax.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
define <8 x i7> @vmax_vv_v8i7(<8 x i7> %va, <8 x i7> %b, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vmax_vv_v8i7:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vadd.vv v9, v9, v9
-; CHECK-NEXT: vsra.vi v9, v9, 1
-; CHECK-NEXT: vadd.vv v8, v8, v8
-; CHECK-NEXT: vsra.vi v8, v8, 1
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vsll.vi v9, v9, 1, v0.t
+; CHECK-NEXT: vsra.vi v9, v9, 1, ...
[truncated]
|
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma | ||
; CHECK-NEXT: vand.vx v9, v9, a1, v0.t |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In this case, the use of the vp_and appears to be a semantic change. The masked off lines were previously legalized, and now aren't. Which is the expected behavior?
Setting the correctness aside for a moment, I'd expect the unmasked legalization (using a temporary register) to frequently perform better. Where's the right place to account for that? Probably not during legalization per se, but these look like perf regressions in practice to me. Do we care?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In this case, the use of the vp_and appears to be a semantic change. The masked off lines were previously legalized, and now aren't. Which is the expected behavior?
The masked off elements aren't consumed by the vp_udiv since it has the same mask so it shouldn't matter.
Setting the correctness aside for a moment, I'd expect the unmasked legalization (using a temporary register) to frequently perform better. Where's the right place to account for that? Probably not during legalization per se, but these look like perf regressions in practice to me. Do we care?
Agreed. I'm skeptical that using a mask on VP arithmetic intrinsics makes sense in general. Division is probably the most useful case to avoid undefined behavior or improve latency.
I can drop the masks if you want. My primary goal was fixing the VL toggles.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On the correctness concern, I'd gotten myself confused by assuming that the vp.udiv must have a passthru and skimming the assembly with that mindset.
I'm fine with masking in this change. If anyone cares about performance of vp code, we should probably be doing a generic combine to strip masking when it doesn't mater.
@@ -1540,6 +1540,25 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { | |||
return getNode(ISD::AND, DL, OpVT, Op, getConstant(Imm, DL, OpVT)); | |||
} | |||
|
|||
SDValue SelectionDAG::getVPZeroExtendInReg(SDValue Op, SDValue Mask, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you land the first commit in the stack (the NFC), and then rebase?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It went through separate review. I just hadn't rebased this patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma | ||
; CHECK-NEXT: vand.vx v9, v9, a1, v0.t |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
On the correctness concern, I'd gotten myself confused by assuming that the vp.udiv must have a passthru and skimming the assembly with that mindset.
I'm fine with masking in this change. If anyone cares about performance of vp code, we should probably be doing a generic combine to strip masking when it doesn't mater.
…P arithmetic. (llvm#92799) This adds VPSExtPromotedInteger and VPZExtPromotedInteger and uses them to promote many arithmetic operations. VPSExtPromotedInteger uses a shift pair because we don't have VP_SIGN_EXTEND_INREG yet.
This adds VPSExtPromotedInteger and VPZExtPromotedInteger and uses them to promote many arithmetic operations.
VPSExtPromotedInteger uses a shift pair because we don't have VP_SIGN_EXTEND_INREG yet.