diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 6011e160762202..c03c278fcebe78 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -344,16 +344,6 @@ class LoopVectorizationPlanner { /// A builder used to construct the current plan. VPBuilder Builder; - /// Computes the cost of \p Plan for vectorization factor \p VF. - /// - /// The current implementation requires access to the - /// LoopVectorizationLegality to handle inductions and reductions, which is - /// why it is kept separate from the VPlan-only cost infrastructure. - /// - /// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has - /// been retired. - InstructionCost cost(VPlan &Plan, ElementCount VF) const; - public: LoopVectorizationPlanner( Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, @@ -375,9 +365,6 @@ class LoopVectorizationPlanner { /// Return the best VPlan for \p VF. VPlan &getBestPlanFor(ElementCount VF) const; - /// Return the most profitable plan and fix its VF to the most profitable one. - VPlan &getBestPlan() const; - /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan /// according to the best selected \p VF and \p UF. /// @@ -456,9 +443,7 @@ class LoopVectorizationPlanner { ElementCount MinVF); /// \return The most profitable vectorization factor and the cost of that VF. - /// This method checks every VF in \p CandidateVFs. This is now only used to - /// verify the decisions by the new VPlan-based cost-model and will be retired - /// once the VPlan-based cost-model is stabilized. + /// This method checks every VF in \p CandidateVFs. VectorizationFactor selectVectorizationFactor(const ElementCountSet &CandidateVFs); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 63e7c55791f2d0..9571cfe358bf34 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -290,7 +290,7 @@ static cl::opt ForceTargetMaxVectorInterleaveFactor( cl::desc("A flag that overrides the target's max interleave factor for " "vectorized loops.")); -cl::opt ForceTargetInstructionCost( +static cl::opt ForceTargetInstructionCost( "force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " @@ -412,6 +412,14 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) { return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty); } +/// A helper function that returns the reciprocal of the block probability of +/// predicated blocks. If we return X, we are assuming the predicated block +/// will execute once for every X iterations of the loop header. +/// +/// TODO: We should use actual block probability here, if available. Currently, +/// we always assume predicated blocks have a 50% chance of executing. +static unsigned getReciprocalPredBlockProb() { return 2; } + /// Returns "best known" trip count for the specified loop \p L as defined by /// the following procedure: /// 1) Returns exact trip count if it is known. @@ -1613,16 +1621,6 @@ class LoopVectorizationCostModel { /// \p VF is the vectorization factor chosen for the original loop. bool isEpilogueVectorizationProfitable(const ElementCount VF) const; - /// Return the cost of instructions in an inloop reduction pattern, if I is - /// part of that pattern. - std::optional - getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy, - TTI::TargetCostKind CostKind) const; - - /// Returns the execution time cost of an instruction for a given vector - /// width. Vector width of one means scalar. - VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF); - private: unsigned NumPredStores = 0; @@ -1648,11 +1646,21 @@ class LoopVectorizationCostModel { /// of elements. ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements); + /// Returns the execution time cost of an instruction for a given vector + /// width. Vector width of one means scalar. + VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF); + /// The cost-computation logic from getInstructionCost which provides /// the vector type as an output parameter. InstructionCost getInstructionCost(Instruction *I, ElementCount VF, Type *&VectorTy); + /// Return the cost of instructions in an inloop reduction pattern, if I is + /// part of that pattern. + std::optional + getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy, + TTI::TargetCostKind CostKind) const; + /// Calculate vectorization cost of memory instruction \p I. InstructionCost getMemoryInstructionCost(Instruction *I, ElementCount VF); @@ -7289,10 +7297,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { if (!MaxFactors.hasVector()) return VectorizationFactor::Disabled(); - // Select the optimal vectorization factor according to the legacy cost-model. - // This is now only used to verify the decisions by the new VPlan-based - // cost-model and will be retired once the VPlan-based cost-model is - // stabilized. + // Select the optimal vectorization factor. VectorizationFactor VF = selectVectorizationFactor(VFCandidates); assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero."); if (!hasPlanWithVF(VF.Width)) { @@ -7303,189 +7308,6 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { return VF; } -InstructionCost VPCostContext::getLegacyCost(Instruction *UI, - ElementCount VF) const { - return CM.getInstructionCost(UI, VF).first; -} - -bool VPCostContext::skipCostComputation(Instruction *UI, bool IsVector) const { - return (IsVector && CM.VecValuesToIgnore.contains(UI)) || - SkipCostComputation.contains(UI); -} - -InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan, - ElementCount VF) const { - InstructionCost Cost = 0; - LLVMContext &LLVMCtx = OrigLoop->getHeader()->getContext(); - VPCostContext CostCtx(CM.TTI, Legal->getWidestInductionType(), LLVMCtx, CM); - - // Cost modeling for inductions is inaccurate in the legacy cost model - // compared to the recipes that are generated. To match here initially during - // VPlan cost model bring up directly use the induction costs from the legacy - // cost model. Note that we do this as pre-processing; the VPlan may not have - // any recipes associated with the original induction increment instruction - // and may replace truncates with VPWidenIntOrFpInductionRecipe. We precompute - // the cost of induction phis and increments (both that are represented by - // recipes and those that are not), to avoid distinguishing between them here, - // and skip all recipes that represent induction phis and increments (the - // former case) later on, if they exist, to avoid counting them twice. - // Similarly we pre-compute the cost of any optimized truncates. - // TODO: Switch to more accurate costing based on VPlan. - for (const auto &[IV, IndDesc] : Legal->getInductionVars()) { - Instruction *IVInc = cast( - IV->getIncomingValueForBlock(OrigLoop->getLoopLatch())); - SmallVector IVInsts = {IV, IVInc}; - for (User *U : IV->users()) { - auto *CI = cast(U); - if (!CostCtx.CM.isOptimizableIVTruncate(CI, VF)) - continue; - IVInsts.push_back(CI); - } - for (Instruction *IVInst : IVInsts) { - if (!CostCtx.SkipCostComputation.insert(IVInst).second) - continue; - InstructionCost InductionCost = CostCtx.getLegacyCost(IVInst, VF); - LLVM_DEBUG({ - dbgs() << "Cost of " << InductionCost << " for VF " << VF - << ": induction instruction " << *IVInst << "\n"; - }); - Cost += InductionCost; - } - } - - /// Compute the cost of all exiting conditions of the loop using the legacy - /// cost model. This is to match the legacy behavior, which adds the cost of - /// all exit conditions. Note that this over-estimates the cost, as there will - /// be a single condition to control the vector loop. - SmallVector Exiting; - CM.TheLoop->getExitingBlocks(Exiting); - SetVector ExitInstrs; - // Collect all exit conditions. - for (BasicBlock *EB : Exiting) { - auto *Term = dyn_cast(EB->getTerminator()); - if (!Term) - continue; - if (auto *CondI = dyn_cast(Term->getOperand(0))) { - ExitInstrs.insert(CondI); - } - } - // Compute the cost of all instructions only feeding the exit conditions. - for (unsigned I = 0; I != ExitInstrs.size(); ++I) { - Instruction *CondI = ExitInstrs[I]; - if (!OrigLoop->contains(CondI) || - !CostCtx.SkipCostComputation.insert(CondI).second) - continue; - Cost += CostCtx.getLegacyCost(CondI, VF); - for (Value *Op : CondI->operands()) { - auto *OpI = dyn_cast(Op); - if (!OpI || any_of(OpI->users(), [&ExitInstrs](User *U) { - return !ExitInstrs.contains(cast(U)); - })) - continue; - ExitInstrs.insert(OpI); - } - } - - // The legacy cost model has special logic to compute the cost of in-loop - // reductions, which may be smaller than the sum of all instructions involved - // in the reduction. For AnyOf reductions, VPlan codegen may remove the select - // which the legacy cost model uses to assign cost. Pre-compute their costs - // for now. - // TODO: Switch to costing based on VPlan once the logic has been ported. - for (const auto &[RedPhi, RdxDesc] : Legal->getReductionVars()) { - if (!CM.isInLoopReduction(RedPhi) && - !RecurrenceDescriptor::isAnyOfRecurrenceKind( - RdxDesc.getRecurrenceKind())) - continue; - - // AnyOf reduction codegen may remove the select. To match the legacy cost - // model, pre-compute the cost for AnyOf reductions here. - if (RecurrenceDescriptor::isAnyOfRecurrenceKind( - RdxDesc.getRecurrenceKind())) { - auto *Select = cast(*find_if( - RedPhi->users(), [](User *U) { return isa(U); })); - assert(!CostCtx.SkipCostComputation.contains(Select) && - "reduction op visited multiple times"); - CostCtx.SkipCostComputation.insert(Select); - auto ReductionCost = CostCtx.getLegacyCost(Select, VF); - LLVM_DEBUG(dbgs() << "Cost of " << ReductionCost << " for VF " << VF - << ":\n any-of reduction " << *Select << "\n"); - Cost += ReductionCost; - continue; - } - - const auto &ChainOps = RdxDesc.getReductionOpChain(RedPhi, OrigLoop); - SetVector ChainOpsAndOperands(ChainOps.begin(), - ChainOps.end()); - // Also include the operands of instructions in the chain, as the cost-model - // may mark extends as free. - for (auto *ChainOp : ChainOps) { - for (Value *Op : ChainOp->operands()) { - if (auto *I = dyn_cast(Op)) - ChainOpsAndOperands.insert(I); - } - } - - // Pre-compute the cost for I, if it has a reduction pattern cost. - for (Instruction *I : ChainOpsAndOperands) { - auto ReductionCost = CM.getReductionPatternCost( - I, VF, ToVectorTy(I->getType(), VF), TTI::TCK_RecipThroughput); - if (!ReductionCost) - continue; - - assert(!CostCtx.SkipCostComputation.contains(I) && - "reduction op visited multiple times"); - CostCtx.SkipCostComputation.insert(I); - LLVM_DEBUG(dbgs() << "Cost of " << ReductionCost << " for VF " << VF - << ":\n in-loop reduction " << *I << "\n"); - Cost += *ReductionCost; - } - } - - // Now compute and add the VPlan-based cost. - Cost += Plan.cost(VF, CostCtx); - LLVM_DEBUG(dbgs() << "Cost for VF " << VF << ": " << Cost << "\n"); - return Cost; -} - -VPlan &LoopVectorizationPlanner::getBestPlan() const { - // If there is a single VPlan with a single VF, return it directly. - VPlan &FirstPlan = *VPlans[0]; - if (VPlans.size() == 1 && size(FirstPlan.vectorFactors()) == 1) - return FirstPlan; - - VPlan *BestPlan = &FirstPlan; - ElementCount ScalarVF = ElementCount::getFixed(1); - assert(hasPlanWithVF(ScalarVF) && - "More than a single plan/VF w/o any plan having scalar VF"); - - InstructionCost ScalarCost = cost(getBestPlanFor(ScalarVF), ScalarVF); - VectorizationFactor BestFactor(ScalarVF, ScalarCost, ScalarCost); - - bool ForceVectorization = Hints.getForce() == LoopVectorizeHints::FK_Enabled; - if (ForceVectorization) { - // Ignore scalar width, because the user explicitly wants vectorization. - // Initialize cost to max so that VF = 2 is, at least, chosen during cost - // evaluation. - BestFactor.Cost = InstructionCost::getMax(); - } - - for (auto &P : VPlans) { - for (ElementCount VF : P->vectorFactors()) { - if (VF.isScalar()) - continue; - InstructionCost Cost = cost(*P, VF); - VectorizationFactor CurrentFactor(VF, Cost, ScalarCost); - if (isMoreProfitable(CurrentFactor, BestFactor)) { - BestFactor = CurrentFactor; - BestPlan = &*P; - } - } - } - BestPlan->setVF(BestFactor.Width); - return *BestPlan; -} - VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const { assert(count_if(VPlans, [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) == @@ -10344,15 +10166,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { VF.MinProfitableTripCount, IC, &LVL, &CM, BFI, PSI, Checks); - VPlan &BestPlan = LVP.getBestPlan(); - assert(size(BestPlan.vectorFactors()) == 1 && - "Plan should have a single VF"); - ElementCount Width = *BestPlan.vectorFactors().begin(); - LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << Width - << "\n"); - assert(VF.Width == Width && - "VPlan cost model and legacy cost model disagreed"); - LVP.executePlan(Width, IC, BestPlan, LB, DT, false); + VPlan &BestPlan = LVP.getBestPlanFor(VF.Width); + LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false); ++LoopsVectorized; // Add metadata to disable runtime unrolling a scalar loop when there diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index fe6de4891a2136..5070950f87c275 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -752,72 +752,6 @@ void VPRegionBlock::execute(VPTransformState *State) { State->Instance.reset(); } -InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx) { - InstructionCost Cost = 0; - for (VPRecipeBase &R : Recipes) - Cost += R.cost(VF, Ctx); - return Cost; -} - -InstructionCost VPRegionBlock::cost(ElementCount VF, VPCostContext &Ctx) { - if (!isReplicator()) { - InstructionCost Cost = 0; - for (VPBlockBase *Block : vp_depth_first_shallow(getEntry())) - Cost += Block->cost(VF, Ctx); - InstructionCost BackedgeCost = - Ctx.TTI.getCFInstrCost(Instruction::Br, TTI::TCK_RecipThroughput); - LLVM_DEBUG(dbgs() << "Cost of " << BackedgeCost << " for VF " << VF - << ": vector loop backedge\n"); - Cost += BackedgeCost; - return Cost; - } - - // Compute the cost of a replicate region. Replicating isn't supported for - // scalable vectors, return an invalid cost for them. - // TODO: Discard scalable VPlans with replicate recipes earlier after - // construction. - if (VF.isScalable()) - return InstructionCost::getInvalid(); - - // First compute the cost of the conditionally executed recipes, followed by - // account for the branching cost, except if the mask is a header mask or - // uniform condition. - using namespace llvm::VPlanPatternMatch; - VPBasicBlock *Then = cast(getEntry()->getSuccessors()[0]); - InstructionCost ThenCost = Then->cost(VF, Ctx); - - // Note the cost estimates below closely match the current legacy cost model. - auto *BOM = cast(&getEntryBasicBlock()->front()); - VPValue *Cond = BOM->getOperand(0); - - // Check if Cond is a header mask and don't account for branching costs as the - // header mask will always be true except in the last iteration. - if (vputils::isHeaderMask(Cond, *getPlan())) - return ThenCost; - - // For the scalar case, we may not always execute the original predicated - // block, Thus, scale the block's cost by the probability of executing it. - if (VF.isScalar()) - return ThenCost / getReciprocalPredBlockProb(); - - // Check if Cond is a uniform compare and don't account for branching costs as - // a uniform condition corresponds to a single branch per VF. - if (vputils::isUniformBoolean(Cond)) - return ThenCost; - - // Add the cost for branches around scalarized and predicated blocks. - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; - - auto *Vec_i1Ty = VectorType::get(IntegerType::getInt1Ty(Ctx.LLVMCtx), VF); - auto FixedVF = VF.getFixedValue(); // Known to be non scalable. - InstructionCost Cost = ThenCost; - Cost += Ctx.TTI.getScalarizationOverhead(Vec_i1Ty, APInt::getAllOnes(FixedVF), - /*Insert*/ false, /*Extract*/ true, - CostKind); - Cost += Ctx.TTI.getCFInstrCost(Instruction::Br, CostKind) * FixedVF; - return Cost; -} - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { @@ -1007,12 +941,6 @@ void VPlan::execute(VPTransformState *State) { "DT not preserved correctly"); } -InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) { - // For now only return the cost of the vector loop region, ignoring any other - // blocks, like the preheader or middle blocks. - return getVectorLoopRegion()->cost(VF, Ctx); -} - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPlan::printLiveIns(raw_ostream &O) const { VPSlotTracker SlotTracker(this); @@ -1555,8 +1483,7 @@ bool vputils::isHeaderMask(VPValue *V, VPlan &Plan) { auto IsWideCanonicalIV = [](VPValue *A) { return isa(A) || (isa(A) && - cast(A)->isCanonical()) || - match(A, m_ScalarIVSteps(m_CanonicalIV(), m_SpecificInt(1))); + cast(A)->isCanonical()); }; VPValue *A, *B; @@ -1568,17 +1495,3 @@ bool vputils::isHeaderMask(VPValue *V, VPlan &Plan) { return match(V, m_Binary(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) && B == Plan.getOrCreateBackedgeTakenCount(); } - -bool vputils::isUniformBoolean(VPValue *Cond) { - if (match(Cond, m_Not(m_VPValue()))) - Cond = Cond->getDefiningRecipe()->getOperand(0); - auto *R = Cond->getDefiningRecipe(); - if (!R) - return true; - // TODO: match additional patterns preserving uniformity of booleans, e.g., - // AND/OR/etc. - return match(R, m_Binary(m_VPValue(), m_VPValue())) && - all_of(R->operands(), [](VPValue *Op) { - return vputils::isUniformAfterVectorization(Op); - }); -} diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 6a51023aed5016..fc25ed907b1530 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -42,7 +42,6 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/IR/FMF.h" #include "llvm/IR/Operator.h" -#include "llvm/Support/InstructionCost.h" #include #include #include @@ -65,11 +64,8 @@ class VPlan; class VPReplicateRecipe; class VPlanSlp; class Value; -class LoopVectorizationCostModel; class LoopVersioning; -struct VPCostContext; - namespace Intrinsic { typedef unsigned ID; } @@ -86,14 +82,6 @@ Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE, Loop *CurLoop = nullptr); -/// A helper function that returns the reciprocal of the block probability of -/// predicated blocks. If we return X, we are assuming the predicated block -/// will execute once for every X iterations of the loop header. -/// -/// TODO: We should use actual block probability here, if available. Currently, -/// we always assume predicated blocks have a 50% chance of executing. -inline unsigned getReciprocalPredBlockProb() { return 2; } - /// A range of powers-of-2 vectorization factors with fixed start and /// adjustable end. The range includes start and excludes end, e.g.,: /// [1, 16) = {1, 2, 4, 8} @@ -636,9 +624,6 @@ class VPBlockBase { /// VPBlockBase, thereby "executing" the VPlan. virtual void execute(VPTransformState *State) = 0; - /// Return the cost of the block. - virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0; - /// Delete all blocks reachable from a given VPBlockBase, inclusive. static void deleteCFG(VPBlockBase *Entry); @@ -722,27 +707,6 @@ class VPLiveOut : public VPUser { #endif }; -/// Struct to hold various analysis needed for cost computations. -struct VPCostContext { - const TargetTransformInfo &TTI; - VPTypeAnalysis Types; - LLVMContext &LLVMCtx; - LoopVectorizationCostModel &CM; - SmallPtrSet SkipCostComputation; - - VPCostContext(const TargetTransformInfo &TTI, Type *CanIVTy, - LLVMContext &LLVMCtx, LoopVectorizationCostModel &CM) - : TTI(TTI), Types(CanIVTy, LLVMCtx), LLVMCtx(LLVMCtx), CM(CM) {} - - /// Return the cost for \p UI with \p VF using the legacy cost model as - /// fallback until computing the cost of all recipes migrates to VPlan. - InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const; - - /// Return true if the cost for \p UI shouldn't be computed, e.g. because it - /// has already been pre-computed. - bool skipCostComputation(Instruction *UI, bool IsVector) const; -}; - /// VPRecipeBase is a base class modeling a sequence of one or more output IR /// instructions. VPRecipeBase owns the VPValues it defines through VPDef /// and is responsible for deleting its defined values. Single-value @@ -782,11 +746,6 @@ class VPRecipeBase : public ilist_node_with_parent, /// this VPRecipe, thereby "executing" the VPlan. virtual void execute(VPTransformState &State) = 0; - /// Return the cost of this recipe, taking into account if the cost - /// computation should be skipped and the ForceTargetInstructionCost flag. - /// Also takes care of printing the cost for debugging. - virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx); - /// Insert an unlinked recipe into a basic block immediately before /// the specified recipe. void insertBefore(VPRecipeBase *InsertPos); @@ -847,11 +806,6 @@ class VPRecipeBase : public ilist_node_with_parent, /// Returns the debug location of the recipe. DebugLoc getDebugLoc() const { return DL; } - -protected: - /// Compute the cost of this recipe using the legacy cost model and the - /// underlying instructions. - InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const; }; // Helper macro to define common classof implementations for recipes. @@ -1427,6 +1381,8 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags { ResultTy(ResultTy) { assert(UI.getOpcode() == Opcode && "opcode of underlying cast doesn't match"); + assert(UI.getType() == ResultTy && + "result type of underlying cast doesn't match"); } VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy) @@ -2140,8 +2096,6 @@ class VPInterleaveRecipe : public VPRecipeBase { "Op must be an operand of the recipe"); return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op); } - - Instruction *getInsertPos() const { return IG->getInsertPos(); } }; /// A recipe to represent inloop reduction operations, performing a reduction on @@ -2956,9 +2910,6 @@ class VPBasicBlock : public VPBlockBase { /// this VPBasicBlock, thereby "executing" the VPlan. void execute(VPTransformState *State) override; - /// Return the cost of this VPBasicBlock. - InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override; - /// Return the position of the first non-phi node recipe in the block. iterator getFirstNonPhi(); @@ -3133,9 +3084,6 @@ class VPRegionBlock : public VPBlockBase { /// this VPRegionBlock, thereby "executing" the VPlan. void execute(VPTransformState *State) override; - // Return the cost of this region. - InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override; - void dropAllReferences(VPValue *NewValue) override; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -3255,9 +3203,6 @@ class VPlan { /// Generate the IR code for this VPlan. void execute(VPTransformState *State); - /// Return the cost of this plan. - InstructionCost cost(ElementCount VF, VPCostContext &Ctx); - VPBasicBlock *getEntry() { return Entry; } const VPBasicBlock *getEntry() const { return Entry; } @@ -3301,11 +3246,6 @@ class VPlan { return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); }); } - iterator_range::iterator> - vectorFactors() const { - return {VFs.begin(), VFs.end()}; - } - bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); } bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); } @@ -3729,9 +3669,6 @@ inline bool isUniformAfterVectorization(VPValue *VPV) { /// Return true if \p V is a header mask in \p Plan. bool isHeaderMask(VPValue *V, VPlan &Plan); -/// Return true if \p Cond is a uniform boolean. -bool isUniformBoolean(VPValue *Cond); - } // end namespace vputils } // end namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 972d895f49d9ff..a3ff6395bb39ed 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -39,7 +39,6 @@ using VectorParts = SmallVector; namespace llvm { extern cl::opt EnableVPlanNativePath; } -extern cl::opt ForceTargetInstructionCost; #define LV_NAME "loop-vectorize" #define DEBUG_TYPE LV_NAME @@ -256,40 +255,6 @@ void VPRecipeBase::moveBefore(VPBasicBlock &BB, insertBefore(BB, I); } -InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) { - if (auto *S = dyn_cast(this)) { - auto *UI = dyn_cast_or_null(S->getUnderlyingValue()); - if (UI && Ctx.skipCostComputation(UI, VF.isVector())) - return 0; - } - - InstructionCost RecipeCost = computeCost(VF, Ctx); - if (ForceTargetInstructionCost.getNumOccurrences() > 0 && - RecipeCost.isValid()) - RecipeCost = InstructionCost(ForceTargetInstructionCost); - - LLVM_DEBUG({ - dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": "; - dump(); - }); - return RecipeCost; -} - -InstructionCost VPRecipeBase::computeCost(ElementCount VF, - VPCostContext &Ctx) const { - // Compute the cost for the recipe falling back to the legacy cost model using - // the underlying instruction. If there is no underlying instruction, returns - // 0. - Instruction *UI = nullptr; - if (auto *S = dyn_cast(this)) - UI = dyn_cast_or_null(S->getUnderlyingValue()); - else if (auto *IG = dyn_cast(this)) - UI = IG->getInsertPos(); - else if (auto *WidenMem = dyn_cast(this)) - UI = &WidenMem->getIngredient(); - return UI ? Ctx.getLegacyCost(UI, VF) : 0; -} - FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const { assert(OpType == OperationType::FPMathOp && "recipe doesn't have fast math flags"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index d979ca331cb5ff..e2b7b0c7a219df 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -999,10 +999,6 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { : Instruction::ZExt; auto *VPC = new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy); - if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) { - // UnderlyingExt has distinct return type, used to retain legacy cost. - VPC->setUnderlyingValue(UnderlyingExt); - } VPC->insertBefore(&R); Trunc->replaceAllUsesWith(VPC); } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) { @@ -1519,7 +1515,6 @@ void VPlanTransforms::dropPoisonGeneratingRecipes( VPInstruction *New = Builder.createOverflowingOp( Instruction::Add, {A, B}, {false, false}, RecWithFlags->getDebugLoc()); - New->setUnderlyingValue(RecWithFlags->getUnderlyingValue()); RecWithFlags->replaceAllUsesWith(New); RecWithFlags->eraseFromParent(); CurRec = New; diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index fa6a65ff2f3ada..8d945f6f2b8ea8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -74,7 +74,8 @@ class VPValue { public: /// Return the underlying Value attached to this VPValue. - Value *getUnderlyingValue() const { return UnderlyingVal; } + Value *getUnderlyingValue() { return UnderlyingVal; } + const Value *getUnderlyingValue() const { return UnderlyingVal; } /// An enumeration for keeping track of the concrete subclass of VPValue that /// are actually instantiated. diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index 41879f3ebef5a5..b5aa96eb23f5e5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -119,7 +119,6 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Interleaving is not beneficial. ; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in ; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop -; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4 ; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1 ; CHECK: LV: Interleaving disabled by the pass manager ; CHECK-NEXT: LV: Vectorizing: innermost loop. @@ -261,7 +260,6 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Interleaving is not beneficial. ; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in ; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop -; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4 ; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1 ; CHECK: LV: Interleaving disabled by the pass manager ; CHECK-NEXT: LV: Vectorizing: innermost loop. diff --git a/llvm/test/Transforms/LoopVectorize/WebAssembly/induction-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/WebAssembly/induction-branch-cost.ll deleted file mode 100644 index 785af1551dd286..00000000000000 --- a/llvm/test/Transforms/LoopVectorize/WebAssembly/induction-branch-cost.ll +++ /dev/null @@ -1,77 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -p loop-vectorize -S %s | FileCheck %s - -target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-f128:64-n32:64-S128-ni:1:10:20" -target triple = "wasm32-unknown-emscripten" - -define void @induction_phi_and_branch_cost(ptr %end, ptr %start.1, ptr %start.2) #0 { -; CHECK-LABEL: define void @induction_phi_and_branch_cost( -; CHECK-SAME: ptr [[END:%.*]], ptr [[START_1:%.*]], ptr [[START_2:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[END2:%.*]] = ptrtoint ptr [[END]] to i32 -; CHECK-NEXT: [[START_11:%.*]] = ptrtoint ptr [[START_1]] to i32 -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[START_11]], [[END2]] -; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[N_VEC]], -4 -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START_1]], i32 [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[N_VEC]], -4 -; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, ptr [[START_2]], i32 [[TMP4]] -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], -4 -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_2]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 -3 -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP7]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START_1]], %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END3]], %[[MIDDLE_BLOCK]] ], [ [[START_2]], %[[ENTRY]] ] -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[PTR_IV_2_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr nusw i8, ptr [[PTR_IV]], i32 -4 -; CHECK-NEXT: [[PTR_IV_2_NEXT]] = getelementptr i8, ptr [[PTR_IV_2]], i32 -4 -; CHECK-NEXT: store i32 0, ptr [[PTR_IV_2]], align 4 -; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV]], [[END]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void -; -entry: - br label %loop - -loop: - %ptr.iv = phi ptr [ %start.1, %entry ], [ %ptr.iv.next, %loop ] - %ptr.iv.2 = phi ptr [ %start.2, %entry ], [ %ptr.iv.2.next, %loop ] - %ptr.iv.next = getelementptr nusw i8, ptr %ptr.iv, i32 -4 - %ptr.iv.2.next = getelementptr i8, ptr %ptr.iv.2, i32 -4 - store i32 0, ptr %ptr.iv.2, align 4 - %ec = icmp eq ptr %ptr.iv, %end - br i1 %ec, label %exit, label %loop - -exit: - ret void -} - -attributes #0 = { "target-features"="+simd128" } -;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -;. diff --git a/llvm/test/Transforms/LoopVectorize/WebAssembly/lit.local.cfg b/llvm/test/Transforms/LoopVectorize/WebAssembly/lit.local.cfg deleted file mode 100644 index d5f39ab4dbc8ca..00000000000000 --- a/llvm/test/Transforms/LoopVectorize/WebAssembly/lit.local.cfg +++ /dev/null @@ -1,2 +0,0 @@ -if not "WebAssembly" in config.root.targets: - config.unsupported = True