-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[VPlan] Port invalid cost remarks to VPlan. #99322
Changes from 3 commits
3a56779
65cb0cd
31ed354
d5bcc97
6f1b14d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -75,6 +75,7 @@ | |||||||||||||||||||||
#include "llvm/ADT/Statistic.h" | ||||||||||||||||||||||
#include "llvm/ADT/StringRef.h" | ||||||||||||||||||||||
#include "llvm/ADT/Twine.h" | ||||||||||||||||||||||
#include "llvm/ADT/TypeSwitch.h" | ||||||||||||||||||||||
#include "llvm/ADT/iterator_range.h" | ||||||||||||||||||||||
#include "llvm/Analysis/AssumptionCache.h" | ||||||||||||||||||||||
#include "llvm/Analysis/BasicAliasAnalysis.h" | ||||||||||||||||||||||
|
@@ -891,10 +892,12 @@ static void debugVectorizationMessage(const StringRef Prefix, | |||||||||||||||||||||
/// instruction that prevents vectorization. Otherwise \p TheLoop is used for | ||||||||||||||||||||||
/// the location of the remark. \return the remark object that can be | ||||||||||||||||||||||
/// streamed to. | ||||||||||||||||||||||
static OptimizationRemarkAnalysis createLVAnalysis(const char *PassName, | ||||||||||||||||||||||
StringRef RemarkName, Loop *TheLoop, Instruction *I) { | ||||||||||||||||||||||
static OptimizationRemarkAnalysis | ||||||||||||||||||||||
createLVAnalysis(const char *PassName, StringRef RemarkName, Loop *TheLoop, | ||||||||||||||||||||||
Instruction *I, DebugLoc DL = {}) { | ||||||||||||||||||||||
Value *CodeRegion = TheLoop->getHeader(); | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps slightly clarify the precedence. If DL is provided, should it have highest precedence?
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adjusted, thanks! |
||||||||||||||||||||||
DebugLoc DL = TheLoop->getStartLoc(); | ||||||||||||||||||||||
if (!DL) | ||||||||||||||||||||||
DL = TheLoop->getStartLoc(); | ||||||||||||||||||||||
|
||||||||||||||||||||||
if (I) { | ||||||||||||||||||||||
CodeRegion = I->getParent(); | ||||||||||||||||||||||
|
@@ -945,13 +948,14 @@ void reportVectorizationFailure(const StringRef DebugMsg, | |||||||||||||||||||||
/// as an optimization remark. Uses either \p I as location of the remark, or | ||||||||||||||||||||||
/// otherwise \p TheLoop. | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about \p DL? |
||||||||||||||||||||||
static void reportVectorizationInfo(const StringRef Msg, const StringRef ORETag, | ||||||||||||||||||||||
OptimizationRemarkEmitter *ORE, Loop *TheLoop, | ||||||||||||||||||||||
Instruction *I = nullptr) { | ||||||||||||||||||||||
OptimizationRemarkEmitter *ORE, | ||||||||||||||||||||||
Loop *TheLoop, Instruction *I = nullptr, | ||||||||||||||||||||||
DebugLoc DL = {}) { | ||||||||||||||||||||||
LLVM_DEBUG(debugVectorizationMessage("", Msg, I)); | ||||||||||||||||||||||
LoopVectorizeHints Hints(TheLoop, true /* doesn't matter */, *ORE); | ||||||||||||||||||||||
ORE->emit( | ||||||||||||||||||||||
createLVAnalysis(Hints.vectorizeAnalysisPassName(), ORETag, TheLoop, I) | ||||||||||||||||||||||
<< Msg); | ||||||||||||||||||||||
ORE->emit(createLVAnalysis(Hints.vectorizeAnalysisPassName(), ORETag, TheLoop, | ||||||||||||||||||||||
I, DL) | ||||||||||||||||||||||
<< Msg); | ||||||||||||||||||||||
} | ||||||||||||||||||||||
|
||||||||||||||||||||||
/// Report successful vectorization of the loop. In case an outer loop is | ||||||||||||||||||||||
|
@@ -1541,9 +1545,7 @@ class LoopVectorizationCostModel { | |||||||||||||||||||||
/// the factor width. If \p Invalid is not nullptr, this function | ||||||||||||||||||||||
/// will add a pair(Instruction*, ElementCount) to \p Invalid for | ||||||||||||||||||||||
/// each instruction that has an Invalid cost for the given VF. | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Drop last sentence about \p Invalid? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed thanks! |
||||||||||||||||||||||
InstructionCost | ||||||||||||||||||||||
expectedCost(ElementCount VF, | ||||||||||||||||||||||
SmallVectorImpl<InstructionVFPair> *Invalid = nullptr); | ||||||||||||||||||||||
InstructionCost expectedCost(ElementCount VF); | ||||||||||||||||||||||
|
||||||||||||||||||||||
bool hasPredStores() const { return NumPredStores > 0; } | ||||||||||||||||||||||
|
||||||||||||||||||||||
|
@@ -4343,24 +4345,43 @@ bool LoopVectorizationPlanner::isMoreProfitable( | |||||||||||||||||||||
return CmpFn(RTCostA, RTCostB); | ||||||||||||||||||||||
} | ||||||||||||||||||||||
|
||||||||||||||||||||||
static void emitInvalidCostRemarks(SmallVector<InstructionVFPair> InvalidCosts, | ||||||||||||||||||||||
OptimizationRemarkEmitter *ORE, | ||||||||||||||||||||||
Loop *TheLoop) { | ||||||||||||||||||||||
void LoopVectorizationPlanner::emitInvalidCostRemarks( | ||||||||||||||||||||||
OptimizationRemarkEmitter *ORE) { | ||||||||||||||||||||||
if (VPlans.empty()) | ||||||||||||||||||||||
return; | ||||||||||||||||||||||
|
||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Can be dropped. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dropped, thanks! |
||||||||||||||||||||||
using RecipeVFPair = std::pair<VPRecipeBase *, ElementCount>; | ||||||||||||||||||||||
SmallVector<RecipeVFPair> InvalidCosts; | ||||||||||||||||||||||
for (const auto &Plan : VPlans) { | ||||||||||||||||||||||
for (ElementCount VF : Plan->vectorFactors()) { | ||||||||||||||||||||||
LLVMContext &LLVMCtx = OrigLoop->getHeader()->getContext(); | ||||||||||||||||||||||
VPCostContext CostCtx(CM.TTI, Legal->getWidestInductionType(), LLVMCtx, | ||||||||||||||||||||||
CM); | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can be defined at the outset? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately not at the moment, as we mark underlying instructions for VPReplicateRecipes to only compute the cost once, which means we would skip them in some cases
Comment on lines
+4357
to
+4358
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CostCtx is kept here rather than hoisting it alongside LLVMCtx above, due to its caching of SkipCostComputation. But the latter is initialized by calling LVP::cost(Plan, VF), whereas here all recipes are asked for their cost directly, w/o going through LVP::cost(). Should LVP::cost() be called first, and iff it returns invalid traverse the recipes? Or note that invalid costs cannot be skipped(?), so calling LVP::cost() is redundant when only invalid costs are sought, in which case CostCtx can be hoisted(?) |
||||||||||||||||||||||
auto Iter = vp_depth_first_deep(Plan->getVectorLoopRegion()->getEntry()); | ||||||||||||||||||||||
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) { | ||||||||||||||||||||||
for (auto &R : *VPBB) { | ||||||||||||||||||||||
if (R.cost(VF, CostCtx).isValid()) | ||||||||||||||||||||||
continue; | ||||||||||||||||||||||
InvalidCosts.emplace_back(&R, VF); | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done thanks! |
||||||||||||||||||||||
} | ||||||||||||||||||||||
} | ||||||||||||||||||||||
} | ||||||||||||||||||||||
} | ||||||||||||||||||||||
if (InvalidCosts.empty()) | ||||||||||||||||||||||
return; | ||||||||||||||||||||||
|
||||||||||||||||||||||
// Emit a report of VFs with invalid costs in the loop. | ||||||||||||||||||||||
|
||||||||||||||||||||||
// Group the remarks per instruction, keeping the instruction order from | ||||||||||||||||||||||
// InvalidCosts. | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated, thanks! |
||||||||||||||||||||||
std::map<Instruction *, unsigned> Numbering; | ||||||||||||||||||||||
DenseMap<VPRecipeBase *, unsigned> Numbering; | ||||||||||||||||||||||
unsigned I = 0; | ||||||||||||||||||||||
for (auto &Pair : InvalidCosts) | ||||||||||||||||||||||
if (!Numbering.count(Pair.first)) | ||||||||||||||||||||||
Numbering[Pair.first] = I++; | ||||||||||||||||||||||
|
||||||||||||||||||||||
// Sort the list, first on instruction(number) then on VF. | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, thanks! |
||||||||||||||||||||||
sort(InvalidCosts, [&Numbering](InstructionVFPair &A, InstructionVFPair &B) { | ||||||||||||||||||||||
sort(InvalidCosts, [&Numbering](RecipeVFPair &A, RecipeVFPair &B) { | ||||||||||||||||||||||
if (Numbering[A.first] != Numbering[B.first]) | ||||||||||||||||||||||
return Numbering[A.first] < Numbering[B.first]; | ||||||||||||||||||||||
const auto &LHS = A.second; | ||||||||||||||||||||||
|
@@ -4374,33 +4395,57 @@ static void emitInvalidCostRemarks(SmallVector<InstructionVFPair> InvalidCosts, | |||||||||||||||||||||
// Group the instructions together to emit separate remarks for: | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
say something about Opcodes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done thanks! |
||||||||||||||||||||||
// load (vf1, vf2) | ||||||||||||||||||||||
// store (vf1) | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated , thanks! |
||||||||||||||||||||||
auto Tail = ArrayRef<InstructionVFPair>(InvalidCosts); | ||||||||||||||||||||||
auto Subset = ArrayRef<InstructionVFPair>(); | ||||||||||||||||||||||
auto Tail = ArrayRef<RecipeVFPair>(InvalidCosts); | ||||||||||||||||||||||
auto Subset = ArrayRef<RecipeVFPair>(); | ||||||||||||||||||||||
do { | ||||||||||||||||||||||
if (Subset.empty()) | ||||||||||||||||||||||
Subset = Tail.take_front(1); | ||||||||||||||||||||||
|
||||||||||||||||||||||
Instruction *I = Subset.front().first; | ||||||||||||||||||||||
VPRecipeBase *R = Subset.front().first; | ||||||||||||||||||||||
|
||||||||||||||||||||||
unsigned Opcode = | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This retains current dumps, a worthy (temporary) objective, but deserves further attention - recipes (including those with invalid cost) should arguably print themselves, as in R.print(), perhaps supporting a shorter printing of their "opcode" only? |
||||||||||||||||||||||
TypeSwitch<const VPRecipeBase *, unsigned>(R) | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are all cases handled, default deemed unreachable? |
||||||||||||||||||||||
.Case<VPHeaderPHIRecipe>( | ||||||||||||||||||||||
[](const auto *R) { return Instruction::PHI; }) | ||||||||||||||||||||||
.Case<VPWidenSelectRecipe>( | ||||||||||||||||||||||
[](const auto *R) { return Instruction::Select; }) | ||||||||||||||||||||||
.Case<VPWidenStoreRecipe>( | ||||||||||||||||||||||
[](const auto *R) { return Instruction::Store; }) | ||||||||||||||||||||||
.Case<VPWidenLoadRecipe>( | ||||||||||||||||||||||
[](const auto *R) { return Instruction::Load; }) | ||||||||||||||||||||||
.Case<VPWidenCallRecipe>( | ||||||||||||||||||||||
[](const auto *R) { return Instruction::Call; }) | ||||||||||||||||||||||
.Case<VPInstruction, VPWidenRecipe, VPReplicateRecipe, | ||||||||||||||||||||||
VPWidenCastRecipe>( | ||||||||||||||||||||||
[](const auto *R) { return R->getOpcode(); }) | ||||||||||||||||||||||
.Case<VPInterleaveRecipe>([](const VPInterleaveRecipe *R) { | ||||||||||||||||||||||
return R->getStoredValues().empty() ? Instruction::Load | ||||||||||||||||||||||
: Instruction::Store; | ||||||||||||||||||||||
}); | ||||||||||||||||||||||
|
||||||||||||||||||||||
// If the next instruction is different, or if there are no other pairs, | ||||||||||||||||||||||
// emit a remark for the collated subset. e.g. | ||||||||||||||||||||||
// [(load, vf1), (load, vf2))] | ||||||||||||||||||||||
// to emit: | ||||||||||||||||||||||
// remark: invalid costs for 'load' at VF=(vf, vf2) | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, thanks! |
||||||||||||||||||||||
if (Subset == Tail || Tail[Subset.size()].first != I) { | ||||||||||||||||||||||
if (Subset == Tail || Tail[Subset.size()].first != R) { | ||||||||||||||||||||||
std::string OutString; | ||||||||||||||||||||||
raw_string_ostream OS(OutString); | ||||||||||||||||||||||
assert(!Subset.empty() && "Unexpected empty range"); | ||||||||||||||||||||||
OS << "Instruction with invalid costs prevented vectorization at VF=("; | ||||||||||||||||||||||
OS << "Recipe with invalid costs prevented vectorization at VF=("; | ||||||||||||||||||||||
for (const auto &Pair : Subset) | ||||||||||||||||||||||
OS << (Pair.second == Subset.front().second ? "" : ", ") << Pair.second; | ||||||||||||||||||||||
OS << "):"; | ||||||||||||||||||||||
if (auto *CI = dyn_cast<CallInst>(I)) | ||||||||||||||||||||||
OS << " call to " << CI->getCalledFunction()->getName(); | ||||||||||||||||||||||
if (Opcode == Instruction::Call) | ||||||||||||||||||||||
OS << " call to " | ||||||||||||||||||||||
<< R->getOperand(R->getNumOperands() - 1) | ||||||||||||||||||||||
->getLiveInIRValue() | ||||||||||||||||||||||
->getName(); | ||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Better have VPWidenCallRecipe and VPReplicateRecipe-of-a-Call take care of printing themselves via getCalledScalarFunction and underlying.getCalledFunction(), respectively? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated, thanks! |
||||||||||||||||||||||
else | ||||||||||||||||||||||
OS << " " << I->getOpcodeName(); | ||||||||||||||||||||||
OS << " " << Instruction::getOpcodeName(Opcode); | ||||||||||||||||||||||
OS.flush(); | ||||||||||||||||||||||
reportVectorizationInfo(OutString, "InvalidCost", ORE, TheLoop, I); | ||||||||||||||||||||||
reportVectorizationInfo(OutString, "InvalidCost", ORE, OrigLoop, nullptr, | ||||||||||||||||||||||
R->getDebugLoc()); | ||||||||||||||||||||||
Tail = Tail.drop_front(Subset.size()); | ||||||||||||||||||||||
Subset = {}; | ||||||||||||||||||||||
} else | ||||||||||||||||||||||
|
@@ -4529,14 +4574,13 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { | |||||||||||||||||||||
ChosenFactor.Cost = InstructionCost::getMax(); | ||||||||||||||||||||||
} | ||||||||||||||||||||||
|
||||||||||||||||||||||
SmallVector<InstructionVFPair> InvalidCosts; | ||||||||||||||||||||||
for (auto &P : VPlans) { | ||||||||||||||||||||||
for (ElementCount VF : P->vectorFactors()) { | ||||||||||||||||||||||
// The cost for scalar VF=1 is already calculated, so ignore it. | ||||||||||||||||||||||
if (VF.isScalar()) | ||||||||||||||||||||||
continue; | ||||||||||||||||||||||
|
||||||||||||||||||||||
InstructionCost C = CM.expectedCost(VF, &InvalidCosts); | ||||||||||||||||||||||
InstructionCost C = CM.expectedCost(VF); | ||||||||||||||||||||||
VectorizationFactor Candidate(VF, C, ScalarCost.ScalarCost); | ||||||||||||||||||||||
|
||||||||||||||||||||||
#ifndef NDEBUG | ||||||||||||||||||||||
|
@@ -4571,8 +4615,6 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { | |||||||||||||||||||||
} | ||||||||||||||||||||||
} | ||||||||||||||||||||||
|
||||||||||||||||||||||
emitInvalidCostRemarks(InvalidCosts, ORE, OrigLoop); | ||||||||||||||||||||||
|
||||||||||||||||||||||
if (!EnableCondStoresVectorization && CM.hasPredStores()) { | ||||||||||||||||||||||
reportVectorizationFailure( | ||||||||||||||||||||||
"There are conditional stores.", | ||||||||||||||||||||||
|
@@ -5477,8 +5519,7 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount( | |||||||||||||||||||||
return Discount; | ||||||||||||||||||||||
} | ||||||||||||||||||||||
|
||||||||||||||||||||||
InstructionCost LoopVectorizationCostModel::expectedCost( | ||||||||||||||||||||||
ElementCount VF, SmallVectorImpl<InstructionVFPair> *Invalid) { | ||||||||||||||||||||||
InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) { | ||||||||||||||||||||||
InstructionCost Cost; | ||||||||||||||||||||||
|
||||||||||||||||||||||
// For each block. | ||||||||||||||||||||||
|
@@ -5498,10 +5539,6 @@ InstructionCost LoopVectorizationCostModel::expectedCost( | |||||||||||||||||||||
if (C.isValid() && ForceTargetInstructionCost.getNumOccurrences() > 0) | ||||||||||||||||||||||
C = InstructionCost(ForceTargetInstructionCost); | ||||||||||||||||||||||
|
||||||||||||||||||||||
// Keep a list of instructions with invalid costs. | ||||||||||||||||||||||
if (Invalid && !C.isValid()) | ||||||||||||||||||||||
Invalid->emplace_back(&I, VF); | ||||||||||||||||||||||
|
||||||||||||||||||||||
BlockCost += C; | ||||||||||||||||||||||
LLVM_DEBUG(dbgs() << "LV: Found an estimated cost of " << C << " for VF " | ||||||||||||||||||||||
<< VF << " For instruction: " << I << '\n'); | ||||||||||||||||||||||
|
@@ -9841,6 +9878,9 @@ bool LoopVectorizePass::processLoop(Loop *L) { | |||||||||||||||||||||
// Plan how to best vectorize, return the best VF and its cost. | ||||||||||||||||||||||
std::optional<VectorizationFactor> MaybeVF = LVP.plan(UserVF, UserIC); | ||||||||||||||||||||||
|
||||||||||||||||||||||
if (ORE->allowExtraAnalysis(LV_NAME)) | ||||||||||||||||||||||
LVP.emitInvalidCostRemarks(ORE); | ||||||||||||||||||||||
|
||||||||||||||||||||||
VectorizationFactor VF = VectorizationFactor::Disabled(); | ||||||||||||||||||||||
unsigned IC = 1; | ||||||||||||||||||||||
|
||||||||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add documentation for \p DL?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added, thanks!