Skip to content

Commit

Permalink
[VPlan] Replace getBestPlan by getBestVF use also for epilogue vec. (l…
Browse files Browse the repository at this point in the history
…lvm#98821)

Replace getBestPlan by getBestVF which simply finds the best
VF out of the VFs for the available VPlans.

Then use getBestPlan to retrieve the corresponding VPlan.

This allows using getBestVF & getBestPlan for epilogue vectorization
as well. As the same plan may be used to vectorize both the main
and epilogue loop, restricting the VF of the best plan would cause
issues.

PR: llvm#98821
  • Loading branch information
fhahn authored Jul 26, 2024
1 parent 9d22095 commit 67a55e0
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 29 deletions.
4 changes: 2 additions & 2 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -365,8 +365,8 @@ class LoopVectorizationPlanner {
/// Return the best VPlan for \p VF.
VPlan &getBestPlanFor(ElementCount VF) const;

/// Return the most profitable plan and fix its VF to the most profitable one.
VPlan &getBestPlan() const;
/// Return the most profitable vectorization factor.
ElementCount getBestVF() const;

/// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
/// according to the best selected \p VF and \p UF.
Expand Down
43 changes: 18 additions & 25 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7162,13 +7162,12 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
return Cost;
}

VPlan &LoopVectorizationPlanner::getBestPlan() const {
ElementCount LoopVectorizationPlanner::getBestVF() const {
// If there is a single VPlan with a single VF, return it directly.
VPlan &FirstPlan = *VPlans[0];
if (VPlans.size() == 1 && size(FirstPlan.vectorFactors()) == 1)
return FirstPlan;
return *FirstPlan.vectorFactors().begin();

VPlan *BestPlan = &FirstPlan;
ElementCount ScalarVF = ElementCount::getFixed(1);
assert(hasPlanWithVF(ScalarVF) &&
"More than a single plan/VF w/o any plan having scalar VF");
Expand Down Expand Up @@ -7199,14 +7198,11 @@ VPlan &LoopVectorizationPlanner::getBestPlan() const {

InstructionCost Cost = cost(*P, VF);
VectorizationFactor CurrentFactor(VF, Cost, ScalarCost);
if (isMoreProfitable(CurrentFactor, BestFactor)) {
if (isMoreProfitable(CurrentFactor, BestFactor))
BestFactor = CurrentFactor;
BestPlan = &*P;
}
}
}
BestPlan->setVF(BestFactor.Width);
return *BestPlan;
return BestFactor.Width;
}

VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const {
Expand Down Expand Up @@ -10001,10 +9997,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
&CM, BFI, PSI, Checks);

VPlan &BestPlan = LVP.getBestPlan();
assert(BestPlan.hasScalarVFOnly() &&
ElementCount BestVF = LVP.getBestVF();
assert(BestVF.isScalar() &&
"VPlan cost model and legacy cost model disagreed");
LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
VPlan &BestPlan = LVP.getBestPlanFor(BestVF);
LVP.executePlan(BestVF, IC, BestPlan, Unroller, DT, false);

ORE->emit([&]() {
return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
Expand All @@ -10015,21 +10012,25 @@ bool LoopVectorizePass::processLoop(Loop *L) {
} else {
// If we decided that it is *legal* to vectorize the loop, then do it.

ElementCount BestVF = LVP.getBestVF();
LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << BestVF << "\n");
assert(VF.Width == BestVF &&
"VPlan cost model and legacy cost model disagreed");
VPlan &BestPlan = LVP.getBestPlanFor(BestVF);
// Consider vectorizing the epilogue too if it's profitable.
VectorizationFactor EpilogueVF =
LVP.selectEpilogueVectorizationFactor(VF.Width, IC);
LVP.selectEpilogueVectorizationFactor(BestVF, IC);
if (EpilogueVF.Width.isVector()) {

// The first pass vectorizes the main loop and creates a scalar epilogue
// to be vectorized by executing the plan (potentially with a different
// factor) again shortly afterwards.
EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1);
EpilogueLoopVectorizationInfo EPI(BestVF, IC, EpilogueVF.Width, 1);
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE,
EPI, &LVL, &CM, BFI, PSI, Checks);

assert(EPI.MainLoopVF == VF.Width && "VFs must match");
std::unique_ptr<VPlan> BestMainPlan(
LVP.getBestPlanFor(VF.Width).duplicate());
std::unique_ptr<VPlan> BestMainPlan(BestPlan.duplicate());
const auto &[ExpandedSCEVs, ReductionResumeValues] = LVP.executePlan(
EPI.MainLoopVF, EPI.MainLoopUF, *BestMainPlan, MainILV, DT, true);
++LoopsVectorized;
Expand Down Expand Up @@ -10120,18 +10121,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
if (!MainILV.areSafetyChecksAdded())
DisableRuntimeUnroll = true;
} else {
VPlan &BestPlan = LVP.getBestPlan();
assert(size(BestPlan.vectorFactors()) == 1 &&
"Plan should have a single VF");
ElementCount Width = *BestPlan.vectorFactors().begin();
LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << Width
<< "\n");
assert(VF.Width == Width &&
"VPlan cost model and legacy cost model disagreed");
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, Width,
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, BestVF,
VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
PSI, Checks);
LVP.executePlan(Width, IC, BestPlan, LB, DT, false);
LVP.executePlan(BestVF, IC, BestPlan, LB, DT, false);
++LoopsVectorized;

// Add metadata to disable runtime unrolling a scalar loop when there
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,8 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: LV: Not Interleaving.
; CHECK-NEXT: LV: Interleaving is not beneficial.
; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in <stdin>
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1
; CHECK-NEXT: VPlan 'Final VPlan for VF={vscale x 4},UF>=1' {
; CHECK-NEXT: Live-in vp<%0> = VF * UF
Expand Down Expand Up @@ -340,8 +340,8 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: LV: Not Interleaving.
; CHECK-NEXT: LV: Interleaving is not beneficial.
; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in <stdin>
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1
; CHECK-NEXT: VPlan 'Final VPlan for VF={vscale x 4},UF>=1' {
; CHECK-NEXT: Live-in vp<%0> = VF * UF
Expand Down

0 comments on commit 67a55e0

Please sign in to comment.