diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 7be8a18dd72712..01c793afc58868 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -8644,10 +8644,7 @@ llvm::canConvertToMinOrMaxIntrinsic(ArrayRef VL) { if (all_of(VL, [&SelectPattern, &AllCmpSingleUse](Value *I) { Value *LHS, *RHS; auto CurrentPattern = matchSelectPattern(I, LHS, RHS); - if (!SelectPatternResult::isMinOrMax(CurrentPattern.Flavor) || - CurrentPattern.Flavor == SPF_FMINNUM || - CurrentPattern.Flavor == SPF_FMAXNUM || - !I->getType()->isIntOrIntVectorTy()) + if (!SelectPatternResult::isMinOrMax(CurrentPattern.Flavor)) return false; if (SelectPattern.Flavor != SPF_UNKNOWN && SelectPattern.Flavor != CurrentPattern.Flavor) @@ -8666,6 +8663,10 @@ llvm::canConvertToMinOrMaxIntrinsic(ArrayRef VL) { return {Intrinsic::smax, AllCmpSingleUse}; case SPF_UMAX: return {Intrinsic::umax, AllCmpSingleUse}; + case SPF_FMAXNUM: + return {Intrinsic::maxnum, AllCmpSingleUse}; + case SPF_FMINNUM: + return {Intrinsic::minnum, AllCmpSingleUse}; default: llvm_unreachable("unexpected select pattern flavor"); } diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 74a16d3fbcad6b..87b20e8ffa36ab 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -9316,7 +9316,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, function_ref VectorCost) { // Calculate the cost of this instruction. InstructionCost ScalarCost = 0; - if (isa(VL0)) { + if (isa(VL0)) { // For some of the instructions no need to calculate cost for each // particular instruction, we can use the cost of the single // instruction x total number of scalar instructions. @@ -9637,9 +9637,27 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, ? CmpInst::BAD_FCMP_PREDICATE : CmpInst::BAD_ICMP_PREDICATE; - return TTI->getCmpSelInstrCost(E->getOpcode(), OrigScalarTy, - Builder.getInt1Ty(), CurrentPred, CostKind, - VI); + InstructionCost ScalarCost = TTI->getCmpSelInstrCost( + E->getOpcode(), OrigScalarTy, Builder.getInt1Ty(), CurrentPred, + CostKind, VI); + auto [MinMaxID, SelectOnly] = canConvertToMinOrMaxIntrinsic(VI); + if (MinMaxID != Intrinsic::not_intrinsic) { + IntrinsicCostAttributes CostAttrs(MinMaxID, OrigScalarTy, + {OrigScalarTy, OrigScalarTy}); + InstructionCost IntrinsicCost = + TTI->getIntrinsicInstrCost(CostAttrs, CostKind); + // If the selects are the only uses of the compares, they will be + // dead and we can adjust the cost by removing their cost. + if (SelectOnly) { + auto *CI = cast(VI->getOperand(0)); + IntrinsicCost -= TTI->getCmpSelInstrCost( + CI->getOpcode(), OrigScalarTy, Builder.getInt1Ty(), + CI->getPredicate(), CostKind, CI); + } + ScalarCost = std::min(ScalarCost, IntrinsicCost); + } + + return ScalarCost; }; auto GetVectorCost = [&](InstructionCost CommonCost) { auto *MaskTy = getWidenedType(Builder.getInt1Ty(), VL.size()); @@ -9649,17 +9667,19 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, // Check if it is possible and profitable to use min/max for selects // in VL. // - auto IntrinsicAndUse = canConvertToMinOrMaxIntrinsic(VL); - if (IntrinsicAndUse.first != Intrinsic::not_intrinsic) { - IntrinsicCostAttributes CostAttrs(IntrinsicAndUse.first, VecTy, - {VecTy, VecTy}); + auto [MinMaxID, SelectOnly] = canConvertToMinOrMaxIntrinsic(VL); + if (MinMaxID != Intrinsic::not_intrinsic) { + IntrinsicCostAttributes CostAttrs(MinMaxID, VecTy, {VecTy, VecTy}); InstructionCost IntrinsicCost = TTI->getIntrinsicInstrCost(CostAttrs, CostKind); // If the selects are the only uses of the compares, they will be // dead and we can adjust the cost by removing their cost. - if (IntrinsicAndUse.second) - IntrinsicCost -= TTI->getCmpSelInstrCost(Instruction::ICmp, VecTy, + if (SelectOnly) { + auto *CI = + cast(cast(VL.front())->getOperand(0)); + IntrinsicCost -= TTI->getCmpSelInstrCost(CI->getOpcode(), VecTy, MaskTy, VecPred, CostKind); + } VecCost = std::min(VecCost, IntrinsicCost); } return VecCost + CommonCost; diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll index 283cc07dfb9b96..e60e356e5cd819 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll @@ -75,48 +75,64 @@ define void @dist_vec(ptr nocapture noundef readonly %pA, ptr nocapture noundef ; CHECK-NEXT: [[TMP4TT_0_LCSSA:%.*]] = phi <2 x i64> [ zeroinitializer, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_END_LOOPEXIT]] ] ; CHECK-NEXT: [[PB_ADDR_0_LCSSA:%.*]] = phi ptr [ [[PB]], [[ENTRY]] ], [ [[SCEVGEP311]], [[WHILE_END_LOOPEXIT]] ] ; CHECK-NEXT: [[PA_ADDR_0_LCSSA:%.*]] = phi ptr [ [[PA]], [[ENTRY]] ], [ [[SCEVGEP]], [[WHILE_END_LOOPEXIT]] ] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP4FT_0_LCSSA]], <2 x i64> [[TMP4TF_0_LCSSA]], <2 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i64> [[TMP4FF_0_LCSSA]], <2 x i64> [[TMP4TT_0_LCSSA]], <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], <4 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[TMP4FT_0_LCSSA]], <2 x i64> [[TMP4TF_0_LCSSA]], <2 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[TMP4FF_0_LCSSA]], <2 x i64> [[TMP4TT_0_LCSSA]], <2 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], <4 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = trunc <4 x i64> [[TMP12]] to <4 x i32> -; CHECK-NEXT: [[TMP57:%.*]] = trunc <4 x i64> [[TMP15]] to <4 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = add <4 x i32> [[TMP16]], [[TMP57]] +; CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP4TT_0_LCSSA]], i64 0 +; CHECK-NEXT: [[VGETQ_LANE45:%.*]] = extractelement <2 x i64> [[TMP4TT_0_LCSSA]], i64 1 +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[VGETQ_LANE]], [[VGETQ_LANE45]] +; CHECK-NEXT: [[CONV48:%.*]] = trunc i64 [[ADD]] to i32 +; CHECK-NEXT: [[VGETQ_LANE51:%.*]] = extractelement <2 x i64> [[TMP4FF_0_LCSSA]], i64 0 +; CHECK-NEXT: [[VGETQ_LANE55:%.*]] = extractelement <2 x i64> [[TMP4FF_0_LCSSA]], i64 1 +; CHECK-NEXT: [[ADD57:%.*]] = add i64 [[VGETQ_LANE51]], [[VGETQ_LANE55]] +; CHECK-NEXT: [[CONV60:%.*]] = trunc i64 [[ADD57]] to i32 +; CHECK-NEXT: [[VGETQ_LANE63:%.*]] = extractelement <2 x i64> [[TMP4TF_0_LCSSA]], i64 0 +; CHECK-NEXT: [[VGETQ_LANE67:%.*]] = extractelement <2 x i64> [[TMP4TF_0_LCSSA]], i64 1 +; CHECK-NEXT: [[ADD69:%.*]] = add i64 [[VGETQ_LANE63]], [[VGETQ_LANE67]] +; CHECK-NEXT: [[CONV72:%.*]] = trunc i64 [[ADD69]] to i32 +; CHECK-NEXT: [[VGETQ_LANE75:%.*]] = extractelement <2 x i64> [[TMP4FT_0_LCSSA]], i64 0 +; CHECK-NEXT: [[VGETQ_LANE79:%.*]] = extractelement <2 x i64> [[TMP4FT_0_LCSSA]], i64 1 +; CHECK-NEXT: [[ADD81:%.*]] = add i64 [[VGETQ_LANE75]], [[VGETQ_LANE79]] +; CHECK-NEXT: [[CONV84:%.*]] = trunc i64 [[ADD81]] to i32 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[NUMBEROFBOOLS]], 127 ; CHECK-NEXT: [[CMP86284:%.*]] = icmp ugt i32 [[AND]], 31 ; CHECK-NEXT: br i1 [[CMP86284]], label [[WHILE_BODY88:%.*]], label [[WHILE_END122:%.*]] ; CHECK: while.body88: ; CHECK-NEXT: [[PA_ADDR_1291:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_END121:%.*]] ], [ [[PA_ADDR_0_LCSSA]], [[WHILE_END]] ] ; CHECK-NEXT: [[PB_ADDR_1290:%.*]] = phi ptr [ [[INCDEC_PTR89:%.*]], [[WHILE_END121]] ], [ [[PB_ADDR_0_LCSSA]], [[WHILE_END]] ] +; CHECK-NEXT: [[_CTT_0289:%.*]] = phi i32 [ [[ADD99:%.*]], [[WHILE_END121]] ], [ [[CONV48]], [[WHILE_END]] ] +; CHECK-NEXT: [[_CFF_0288:%.*]] = phi i32 [ [[ADD106:%.*]], [[WHILE_END121]] ], [ [[CONV60]], [[WHILE_END]] ] +; CHECK-NEXT: [[_CTF_0287:%.*]] = phi i32 [ [[ADD113:%.*]], [[WHILE_END121]] ], [ [[CONV72]], [[WHILE_END]] ] +; CHECK-NEXT: [[_CFT_0286:%.*]] = phi i32 [ [[ADD120:%.*]], [[WHILE_END121]] ], [ [[CONV84]], [[WHILE_END]] ] ; CHECK-NEXT: [[NBBOOLBLOCK_1285:%.*]] = phi i32 [ [[SUB:%.*]], [[WHILE_END121]] ], [ [[AND]], [[WHILE_END]] ] -; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP34:%.*]], [[WHILE_END121]] ], [ [[TMP17]], [[WHILE_END]] ] -; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[PA_ADDR_1291]], align 4 -; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[PB_ADDR_1290]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[PA_ADDR_1291]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[PB_ADDR_1290]], align 4 ; CHECK-NEXT: br label [[WHILE_BODY93:%.*]] ; CHECK: while.body93: -; CHECK-NEXT: [[A_0279:%.*]] = phi i32 [ [[TMP19]], [[WHILE_BODY88]] ], [ [[SHR96:%.*]], [[WHILE_BODY93]] ] -; CHECK-NEXT: [[B_0278:%.*]] = phi i32 [ [[TMP20]], [[WHILE_BODY88]] ], [ [[SHR97:%.*]], [[WHILE_BODY93]] ] +; CHECK-NEXT: [[_CTT_1283:%.*]] = phi i32 [ [[_CTT_0289]], [[WHILE_BODY88]] ], [ [[ADD99]], [[WHILE_BODY93]] ] +; CHECK-NEXT: [[_CFF_1282:%.*]] = phi i32 [ [[_CFF_0288]], [[WHILE_BODY88]] ], [ [[ADD106]], [[WHILE_BODY93]] ] +; CHECK-NEXT: [[_CTF_1281:%.*]] = phi i32 [ [[_CTF_0287]], [[WHILE_BODY88]] ], [ [[ADD113]], [[WHILE_BODY93]] ] +; CHECK-NEXT: [[_CFT_1280:%.*]] = phi i32 [ [[_CFT_0286]], [[WHILE_BODY88]] ], [ [[ADD120]], [[WHILE_BODY93]] ] +; CHECK-NEXT: [[A_0279:%.*]] = phi i32 [ [[TMP10]], [[WHILE_BODY88]] ], [ [[SHR96:%.*]], [[WHILE_BODY93]] ] +; CHECK-NEXT: [[B_0278:%.*]] = phi i32 [ [[TMP11]], [[WHILE_BODY88]] ], [ [[SHR97:%.*]], [[WHILE_BODY93]] ] ; CHECK-NEXT: [[SHIFT_0277:%.*]] = phi i32 [ 0, [[WHILE_BODY88]] ], [ [[INC:%.*]], [[WHILE_BODY93]] ] -; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x i32> [ [[TMP18]], [[WHILE_BODY88]] ], [ [[TMP34]], [[WHILE_BODY93]] ] ; CHECK-NEXT: [[AND94:%.*]] = and i32 [[A_0279]], 1 ; CHECK-NEXT: [[AND95:%.*]] = and i32 [[B_0278]], 1 ; CHECK-NEXT: [[SHR96]] = lshr i32 [[A_0279]], 1 ; CHECK-NEXT: [[SHR97]] = lshr i32 [[B_0278]], 1 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[AND94]], 0 ; CHECK-NEXT: [[TOBOOL98:%.*]] = icmp ne i32 [[AND95]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TOBOOL]], i1 [[TOBOOL98]], i1 false +; CHECK-NEXT: [[LAND_EXT:%.*]] = zext i1 [[TMP12]] to i32 +; CHECK-NEXT: [[ADD99]] = add i32 [[_CTT_1283]], [[LAND_EXT]] ; CHECK-NEXT: [[TOBOOL100:%.*]] = icmp eq i32 [[AND94]], 0 ; CHECK-NEXT: [[TOBOOL103:%.*]] = icmp eq i32 [[AND95]], 0 -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL100]], i32 0 -; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x i1> [[TMP22]], i1 [[TOBOOL]], i32 1 -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x i1> [[TMP23]], <4 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL98]], i32 0 -; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i1> [[TMP25]], i1 [[TOBOOL103]], i32 1 -; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x i1> [[TMP27]], <4 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP26]], <4 x i1> [[TMP31]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP33:%.*]] = zext <4 x i1> [[TMP32]] to <4 x i32> -; CHECK-NEXT: [[TMP34]] = add <4 x i32> [[TMP21]], [[TMP33]] +; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TOBOOL100]], i1 [[TOBOOL103]], i1 false +; CHECK-NEXT: [[LAND_EXT105:%.*]] = zext i1 [[TMP13]] to i32 +; CHECK-NEXT: [[ADD106]] = add i32 [[_CFF_1282]], [[LAND_EXT105]] +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TOBOOL]], i1 [[TOBOOL103]], i1 false +; CHECK-NEXT: [[LAND_EXT112:%.*]] = zext i1 [[TMP14]] to i32 +; CHECK-NEXT: [[ADD113]] = add i32 [[_CTF_1281]], [[LAND_EXT112]] +; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TOBOOL100]], i1 [[TOBOOL98]], i1 false +; CHECK-NEXT: [[LAND_EXT119:%.*]] = zext i1 [[TMP15]] to i32 +; CHECK-NEXT: [[ADD120]] = add i32 [[_CFT_1280]], [[LAND_EXT119]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[SHIFT_0277]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], 32 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[WHILE_END121]], label [[WHILE_BODY93]] @@ -128,53 +144,61 @@ define void @dist_vec(ptr nocapture noundef readonly %pA, ptr nocapture noundef ; CHECK-NEXT: br i1 [[CMP86]], label [[WHILE_BODY88]], label [[WHILE_END122]] ; CHECK: while.end122: ; CHECK-NEXT: [[NBBOOLBLOCK_1_LCSSA:%.*]] = phi i32 [ [[AND]], [[WHILE_END]] ], [ [[SUB]], [[WHILE_END121]] ] +; CHECK-NEXT: [[_CFT_0_LCSSA:%.*]] = phi i32 [ [[CONV84]], [[WHILE_END]] ], [ [[ADD120]], [[WHILE_END121]] ] +; CHECK-NEXT: [[_CTF_0_LCSSA:%.*]] = phi i32 [ [[CONV72]], [[WHILE_END]] ], [ [[ADD113]], [[WHILE_END121]] ] +; CHECK-NEXT: [[_CFF_0_LCSSA:%.*]] = phi i32 [ [[CONV60]], [[WHILE_END]] ], [ [[ADD106]], [[WHILE_END121]] ] +; CHECK-NEXT: [[_CTT_0_LCSSA:%.*]] = phi i32 [ [[CONV48]], [[WHILE_END]] ], [ [[ADD99]], [[WHILE_END121]] ] ; CHECK-NEXT: [[PB_ADDR_1_LCSSA:%.*]] = phi ptr [ [[PB_ADDR_0_LCSSA]], [[WHILE_END]] ], [ [[INCDEC_PTR89]], [[WHILE_END121]] ] ; CHECK-NEXT: [[PA_ADDR_1_LCSSA:%.*]] = phi ptr [ [[PA_ADDR_0_LCSSA]], [[WHILE_END]] ], [ [[INCDEC_PTR]], [[WHILE_END121]] ] -; CHECK-NEXT: [[TMP35:%.*]] = phi <4 x i32> [ [[TMP17]], [[WHILE_END]] ], [ [[TMP34]], [[WHILE_END121]] ] ; CHECK-NEXT: [[CMP130_NOT299:%.*]] = icmp eq i32 [[NBBOOLBLOCK_1_LCSSA]], 0 ; CHECK-NEXT: br i1 [[CMP130_NOT299]], label [[WHILE_END166:%.*]], label [[WHILE_BODY132_PREHEADER:%.*]] ; CHECK: while.body132.preheader: -; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[PB_ADDR_1_LCSSA]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[PB_ADDR_1_LCSSA]], align 4 ; CHECK-NEXT: [[SUB125:%.*]] = sub nuw nsw i32 32, [[NBBOOLBLOCK_1_LCSSA]] -; CHECK-NEXT: [[SHR128:%.*]] = lshr i32 [[TMP36]], [[SUB125]] -; CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[PA_ADDR_1_LCSSA]], align 4 -; CHECK-NEXT: [[SHR126:%.*]] = lshr i32 [[TMP37]], [[SUB125]] +; CHECK-NEXT: [[SHR128:%.*]] = lshr i32 [[TMP16]], [[SUB125]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[PA_ADDR_1_LCSSA]], align 4 +; CHECK-NEXT: [[SHR126:%.*]] = lshr i32 [[TMP17]], [[SUB125]] ; CHECK-NEXT: br label [[WHILE_BODY132:%.*]] ; CHECK: while.body132: +; CHECK-NEXT: [[_CTT_2306:%.*]] = phi i32 [ [[ADD142:%.*]], [[WHILE_BODY132]] ], [ [[_CTT_0_LCSSA]], [[WHILE_BODY132_PREHEADER]] ] +; CHECK-NEXT: [[_CFF_2305:%.*]] = phi i32 [ [[ADD150:%.*]], [[WHILE_BODY132]] ], [ [[_CFF_0_LCSSA]], [[WHILE_BODY132_PREHEADER]] ] +; CHECK-NEXT: [[_CTF_2304:%.*]] = phi i32 [ [[ADD157:%.*]], [[WHILE_BODY132]] ], [ [[_CTF_0_LCSSA]], [[WHILE_BODY132_PREHEADER]] ] +; CHECK-NEXT: [[_CFT_2303:%.*]] = phi i32 [ [[ADD164:%.*]], [[WHILE_BODY132]] ], [ [[_CFT_0_LCSSA]], [[WHILE_BODY132_PREHEADER]] ] ; CHECK-NEXT: [[NBBOOLBLOCK_2302:%.*]] = phi i32 [ [[DEC165:%.*]], [[WHILE_BODY132]] ], [ [[NBBOOLBLOCK_1_LCSSA]], [[WHILE_BODY132_PREHEADER]] ] ; CHECK-NEXT: [[A_1301:%.*]] = phi i32 [ [[SHR135:%.*]], [[WHILE_BODY132]] ], [ [[SHR126]], [[WHILE_BODY132_PREHEADER]] ] ; CHECK-NEXT: [[B_1300:%.*]] = phi i32 [ [[SHR136:%.*]], [[WHILE_BODY132]] ], [ [[SHR128]], [[WHILE_BODY132_PREHEADER]] ] -; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP51:%.*]], [[WHILE_BODY132]] ], [ [[TMP35]], [[WHILE_BODY132_PREHEADER]] ] ; CHECK-NEXT: [[AND133:%.*]] = and i32 [[A_1301]], 1 ; CHECK-NEXT: [[AND134:%.*]] = and i32 [[B_1300]], 1 ; CHECK-NEXT: [[SHR135]] = lshr i32 [[A_1301]], 1 ; CHECK-NEXT: [[SHR136]] = lshr i32 [[B_1300]], 1 ; CHECK-NEXT: [[TOBOOL137:%.*]] = icmp ne i32 [[AND133]], 0 ; CHECK-NEXT: [[TOBOOL139:%.*]] = icmp ne i32 [[AND134]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TOBOOL137]], i1 [[TOBOOL139]], i1 false +; CHECK-NEXT: [[LAND_EXT141:%.*]] = zext i1 [[TMP18]] to i32 +; CHECK-NEXT: [[ADD142]] = add i32 [[_CTT_2306]], [[LAND_EXT141]] ; CHECK-NEXT: [[TOBOOL144:%.*]] = icmp eq i32 [[AND133]], 0 ; CHECK-NEXT: [[TOBOOL147:%.*]] = icmp eq i32 [[AND134]], 0 -; CHECK-NEXT: [[TMP40:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL144]], i32 0 -; CHECK-NEXT: [[TMP41:%.*]] = insertelement <4 x i1> [[TMP40]], i1 [[TOBOOL137]], i32 1 -; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <4 x i1> [[TMP41]], <4 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL139]], i32 0 -; CHECK-NEXT: [[TMP39:%.*]] = insertelement <4 x i1> [[TMP42]], i1 [[TOBOOL147]], i32 1 -; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <4 x i1> [[TMP39]], <4 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP43]], <4 x i1> [[TMP48]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP50:%.*]] = zext <4 x i1> [[TMP49]] to <4 x i32> -; CHECK-NEXT: [[TMP51]] = add <4 x i32> [[TMP38]], [[TMP50]] +; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TOBOOL144]], i1 [[TOBOOL147]], i1 false +; CHECK-NEXT: [[LAND_EXT149:%.*]] = zext i1 [[TMP19]] to i32 +; CHECK-NEXT: [[ADD150]] = add i32 [[_CFF_2305]], [[LAND_EXT149]] +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TOBOOL137]], i1 [[TOBOOL147]], i1 false +; CHECK-NEXT: [[LAND_EXT156:%.*]] = zext i1 [[TMP20]] to i32 +; CHECK-NEXT: [[ADD157]] = add i32 [[_CTF_2304]], [[LAND_EXT156]] +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TOBOOL144]], i1 [[TOBOOL139]], i1 false +; CHECK-NEXT: [[LAND_EXT163:%.*]] = zext i1 [[TMP21]] to i32 +; CHECK-NEXT: [[ADD164]] = add i32 [[_CFT_2303]], [[LAND_EXT163]] ; CHECK-NEXT: [[DEC165]] = add nsw i32 [[NBBOOLBLOCK_2302]], -1 ; CHECK-NEXT: [[CMP130_NOT:%.*]] = icmp eq i32 [[DEC165]], 0 ; CHECK-NEXT: br i1 [[CMP130_NOT]], label [[WHILE_END166]], label [[WHILE_BODY132]] ; CHECK: while.end166: -; CHECK-NEXT: [[TMP52:%.*]] = phi <4 x i32> [ [[TMP35]], [[WHILE_END122]] ], [ [[TMP51]], [[WHILE_BODY132]] ] -; CHECK-NEXT: [[TMP53:%.*]] = extractelement <4 x i32> [[TMP52]], i32 3 -; CHECK-NEXT: store i32 [[TMP53]], ptr [[CTT:%.*]], align 4 -; CHECK-NEXT: [[TMP54:%.*]] = extractelement <4 x i32> [[TMP52]], i32 2 -; CHECK-NEXT: store i32 [[TMP54]], ptr [[CFF:%.*]], align 4 -; CHECK-NEXT: [[TMP55:%.*]] = extractelement <4 x i32> [[TMP52]], i32 1 -; CHECK-NEXT: store i32 [[TMP55]], ptr [[CTF:%.*]], align 4 -; CHECK-NEXT: [[TMP56:%.*]] = extractelement <4 x i32> [[TMP52]], i32 0 -; CHECK-NEXT: store i32 [[TMP56]], ptr [[CFT:%.*]], align 4 +; CHECK-NEXT: [[_CFT_2_LCSSA:%.*]] = phi i32 [ [[_CFT_0_LCSSA]], [[WHILE_END122]] ], [ [[ADD164]], [[WHILE_BODY132]] ] +; CHECK-NEXT: [[_CTF_2_LCSSA:%.*]] = phi i32 [ [[_CTF_0_LCSSA]], [[WHILE_END122]] ], [ [[ADD157]], [[WHILE_BODY132]] ] +; CHECK-NEXT: [[_CFF_2_LCSSA:%.*]] = phi i32 [ [[_CFF_0_LCSSA]], [[WHILE_END122]] ], [ [[ADD150]], [[WHILE_BODY132]] ] +; CHECK-NEXT: [[_CTT_2_LCSSA:%.*]] = phi i32 [ [[_CTT_0_LCSSA]], [[WHILE_END122]] ], [ [[ADD142]], [[WHILE_BODY132]] ] +; CHECK-NEXT: store i32 [[_CTT_2_LCSSA]], ptr [[CTT:%.*]], align 4 +; CHECK-NEXT: store i32 [[_CFF_2_LCSSA]], ptr [[CFF:%.*]], align 4 +; CHECK-NEXT: store i32 [[_CTF_2_LCSSA]], ptr [[CTF:%.*]], align 4 +; CHECK-NEXT: store i32 [[_CFT_2_LCSSA]], ptr [[CFT:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/remarks_cmp_sel_min_max.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/remarks_cmp_sel_min_max.ll index a009841de6e65f..644d645b9dc88d 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/remarks_cmp_sel_min_max.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/remarks_cmp_sel_min_max.ll @@ -8,7 +8,7 @@ ; YAML-NEXT: Function: min_double ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'Stores SLP vectorized with cost ' -; YAML-NEXT: - Cost: '-3' +; YAML-NEXT: - Cost: '-1' ; YAML-NEXT: - String: ' and with tree size ' ; YAML-NEXT: - TreeSize: '6' define i32 @min_double(ptr noalias nocapture %A, ptr noalias nocapture %B) { @@ -76,7 +76,7 @@ entry: ; YAML-NEXT: Function: max_double ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'Stores SLP vectorized with cost ' -; YAML-NEXT: - Cost: '-3' +; YAML-NEXT: - Cost: '-1' ; YAML-NEXT: - String: ' and with tree size ' ; YAML-NEXT: - TreeSize: '6' define i32 @max_double(ptr noalias nocapture %A, ptr noalias nocapture %B) { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll index 40ca0150d8e744..0b9ed47ce0f178 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll @@ -13,28 +13,30 @@ define void @get_block(i32 %y_pos) local_unnamed_addr #0 { ; CHECK: if.end: ; CHECK-NEXT: [[SUB14:%.*]] = sub nsw i32 [[Y_POS:%.*]], undef ; CHECK-NEXT: [[SHR15:%.*]] = ashr i32 [[SUB14]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[SHR15]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[SUB14]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 undef, i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 undef, i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 undef, i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP6]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[TMP7]], undef -; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP7]], <4 x i32> undef -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 -; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP9]], i32 2 -; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP15]] -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP9]], i32 3 -; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64 -; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[TMP17]] +; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp sgt i32 [[SHR15]], 0 +; CHECK-NEXT: [[COND_I_I:%.*]] = select i1 [[CMP_I_I]], i32 [[SHR15]], i32 0 +; CHECK-NEXT: [[CMP_I4_I:%.*]] = icmp slt i32 [[COND_I_I]], undef +; CHECK-NEXT: [[COND_I5_I:%.*]] = select i1 [[CMP_I4_I]], i32 [[COND_I_I]], i32 undef +; CHECK-NEXT: [[IDXPROM30:%.*]] = sext i32 [[COND_I5_I]] to i64 +; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[IDXPROM30]] +; CHECK-NEXT: [[CMP_I_I_1:%.*]] = icmp sgt i32 [[SUB14]], -1 +; CHECK-NEXT: [[COND_I_I_1:%.*]] = select i1 [[CMP_I_I_1]], i32 undef, i32 0 +; CHECK-NEXT: [[CMP_I4_I_1:%.*]] = icmp slt i32 [[COND_I_I_1]], undef +; CHECK-NEXT: [[COND_I5_I_1:%.*]] = select i1 [[CMP_I4_I_1]], i32 [[COND_I_I_1]], i32 undef +; CHECK-NEXT: [[IDXPROM30_1:%.*]] = sext i32 [[COND_I5_I_1]] to i64 +; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[IDXPROM30_1]] +; CHECK-NEXT: [[CMP_I_I_2:%.*]] = icmp sgt i32 [[SUB14]], -5 +; CHECK-NEXT: [[COND_I_I_2:%.*]] = select i1 [[CMP_I_I_2]], i32 undef, i32 0 +; CHECK-NEXT: [[CMP_I4_I_2:%.*]] = icmp slt i32 [[COND_I_I_2]], undef +; CHECK-NEXT: [[COND_I5_I_2:%.*]] = select i1 [[CMP_I4_I_2]], i32 [[COND_I_I_2]], i32 undef +; CHECK-NEXT: [[IDXPROM30_2:%.*]] = sext i32 [[COND_I5_I_2]] to i64 +; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[IDXPROM30_2]] +; CHECK-NEXT: [[CMP_I_I_3:%.*]] = icmp sgt i32 [[SUB14]], -9 +; CHECK-NEXT: [[COND_I_I_3:%.*]] = select i1 [[CMP_I_I_3]], i32 undef, i32 0 +; CHECK-NEXT: [[CMP_I4_I_3:%.*]] = icmp slt i32 [[COND_I_I_3]], undef +; CHECK-NEXT: [[COND_I5_I_3:%.*]] = select i1 [[CMP_I4_I_3]], i32 [[COND_I_I_3]], i32 undef +; CHECK-NEXT: [[IDXPROM30_3:%.*]] = sext i32 [[COND_I5_I_3]] to i64 +; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[IDXPROM30_3]] ; CHECK-NEXT: unreachable ; entry: