From c4952e513f845a0e290d22ed1131d1b6438bb65b Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Fri, 27 Sep 2024 08:09:10 -0700 Subject: [PATCH] [nfc][ctx_prof] Efficient profile traversal and update (#110052) This optimizes profile updates and visits, where we want to access contexts for a specific function. These are all the current update cases. We do so by maintaining a list of contexts for each function, preserving preorder traversal. The list is updated whenever contexts are `std::move`-d or deleted. --- llvm/include/llvm/Analysis/CtxProfAnalysis.h | 6 +- .../llvm/ProfileData/PGOCtxProfReader.h | 62 ++++++++++++++++++- llvm/lib/Analysis/CtxProfAnalysis.cpp | 47 +++++++++++--- .../Transforms/Utils/CallPromotionUtils.cpp | 2 +- llvm/lib/Transforms/Utils/InlineFunction.cpp | 2 +- 5 files changed, 103 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h index 0a9543f037eb58..be040d5eca5f31 100644 --- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h +++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h @@ -35,7 +35,7 @@ class PGOContextualProfile { uint32_t NextCounterIndex = 0; uint32_t NextCallsiteIndex = 0; const std::string Name; - + PGOCtxProfContext Index; FunctionInfo(StringRef Name) : Name(Name) {} }; std::optional Profiles; @@ -50,6 +50,8 @@ class PGOContextualProfile { // its state piecemeal. PGOContextualProfile() = default; + void initIndex(); + public: PGOContextualProfile(const PGOContextualProfile &) = delete; PGOContextualProfile(PGOContextualProfile &&) = default; @@ -94,7 +96,7 @@ class PGOContextualProfile { using ConstVisitor = function_ref; using Visitor = function_ref; - void update(Visitor, const Function *F = nullptr); + void update(Visitor, const Function &F); void visit(ConstVisitor, const Function *F = nullptr) const; const CtxProfFlatProfile flatten() const; diff --git a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h index a00c21ddc7d7a1..d358041e3a001f 100644 --- a/llvm/include/llvm/ProfileData/PGOCtxProfReader.h +++ b/llvm/include/llvm/ProfileData/PGOCtxProfReader.h @@ -22,6 +22,58 @@ #include namespace llvm { +class PGOContextualProfile; +class PGOCtxProfContext; + +namespace internal { +// When we traverse the contextual profile, we typically want to visit contexts +// pertaining to a specific function. To avoid traversing the whole tree, we +// want to keep a per-function list - which will be in preorder - of that +// function's contexts. This happens in PGOContextualProfile. For memory use +// efficiency, we want to make PGOCtxProfContext an intrusive double-linked list +// node. We need to handle the cases where PGOCtxProfContext nodes are moved and +// deleted: in both cases, we need to update the index (==list). We can do that +// directly from the node in the list, without knowing who the "parent" of the +// list is. That makes the ADT ilist overkill here. Finally, IndexNode is meant +// to be an implementation detail of PGOCtxProfContext, and the only reason it's +// factored out is to avoid implementing move semantics for all its members. +class IndexNode { + // This class' members are intentionally private - it's a convenience + // implementation detail. + friend class ::llvm::PGOCtxProfContext; + friend class ::llvm::PGOContextualProfile; + + IndexNode *Previous = nullptr; + IndexNode *Next = nullptr; + + ~IndexNode() { + if (Next) + Next->Previous = Previous; + if (Previous) + Previous->Next = Next; + } + + IndexNode(const IndexNode &Other) = delete; + + IndexNode(IndexNode &&Other) { + // Copy the neighbor info + Next = Other.Next; + Previous = Other.Previous; + + // Update the neighbors to point to this object + if (Other.Next) + Other.Next->Previous = this; + if (Other.Previous) + Other.Previous->Next = this; + + // Make sure the dtor is a noop + Other.Next = nullptr; + Other.Previous = nullptr; + } + IndexNode() = default; +}; +} // namespace internal + /// A node (context) in the loaded contextual profile, suitable for mutation /// during IPO passes. We generally expect a fraction of counters and /// callsites to be populated. We continue to model counters as vectors, but @@ -29,13 +81,15 @@ namespace llvm { /// there is a small number of indirect targets (usually, 1 for direct calls); /// but potentially a large number of callsites, and, as inlining progresses, /// the callsite count of a caller will grow. -class PGOCtxProfContext final { +class PGOCtxProfContext final : public internal::IndexNode { public: using CallTargetMapTy = std::map; using CallsiteMapTy = std::map; private: friend class PGOCtxProfileReader; + friend class PGOContextualProfile; + GlobalValue::GUID GUID = 0; SmallVector Counters; CallsiteMapTy Callsites; @@ -47,11 +101,15 @@ class PGOCtxProfContext final { getOrEmplace(uint32_t Index, GlobalValue::GUID G, SmallVectorImpl &&Counters); + // Create a bogus context object, used for anchoring the index double linked + // list - see IndexNode + PGOCtxProfContext() = default; + public: PGOCtxProfContext(const PGOCtxProfContext &) = delete; PGOCtxProfContext &operator=(const PGOCtxProfContext &) = delete; PGOCtxProfContext(PGOCtxProfContext &&) = default; - PGOCtxProfContext &operator=(PGOCtxProfContext &&) = default; + PGOCtxProfContext &operator=(PGOCtxProfContext &&) = delete; GlobalValue::GUID guid() const { return GUID; } const SmallVectorImpl &counters() const { return Counters; } diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp index e03b5dfd538194..825e517cd09f52 100644 --- a/llvm/lib/Analysis/CtxProfAnalysis.cpp +++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp @@ -184,6 +184,7 @@ PGOContextualProfile CtxProfAnalysis::run(Module &M, // If we made it this far, the Result is valid - which we mark by setting // .Profiles. Result.Profiles = std::move(*MaybeCtx); + Result.initIndex(); return Result; } @@ -265,11 +266,9 @@ CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) { template static void preorderVisit(ProfilesTy &Profiles, - function_ref Visitor, - GlobalValue::GUID Match = 0) { + function_ref Visitor) { std::function Traverser = [&](auto &Ctx) { - if (!Match || Ctx.guid() == Match) - Visitor(Ctx); + Visitor(Ctx); for (auto &[_, SubCtxSet] : Ctx.callsites()) for (auto &[__, Subctx] : SubCtxSet) Traverser(Subctx); @@ -278,16 +277,44 @@ static void preorderVisit(ProfilesTy &Profiles, Traverser(P); } -void PGOContextualProfile::update(Visitor V, const Function *F) { - GlobalValue::GUID G = F ? getDefinedFunctionGUID(*F) : 0U; +void PGOContextualProfile::initIndex() { + // Initialize the head of the index list for each function. We don't need it + // after this point. + DenseMap InsertionPoints; + for (auto &[Guid, FI] : FuncInfo) + InsertionPoints[Guid] = &FI.Index; preorderVisit( - *Profiles, V, G); + *Profiles, [&](PGOCtxProfContext &Ctx) { + auto InsertIt = InsertionPoints.find(Ctx.guid()); + if (InsertIt == InsertionPoints.end()) + return; + // Insert at the end of the list. Since we traverse in preorder, it + // means that when we iterate the list from the beginning, we'd + // encounter the contexts in the order we would have, should we have + // performed a full preorder traversal. + InsertIt->second->Next = &Ctx; + Ctx.Previous = InsertIt->second; + InsertIt->second = &Ctx; + }); +} + +void PGOContextualProfile::update(Visitor V, const Function &F) { + assert(isFunctionKnown(F)); + GlobalValue::GUID G = getDefinedFunctionGUID(F); + for (auto *Node = FuncInfo.find(G)->second.Index.Next; Node; + Node = Node->Next) + V(*reinterpret_cast(Node)); } void PGOContextualProfile::visit(ConstVisitor V, const Function *F) const { - GlobalValue::GUID G = F ? getDefinedFunctionGUID(*F) : 0U; - preorderVisit(*Profiles, V, G); + if (!F) + return preorderVisit(*Profiles, V); + assert(isFunctionKnown(*F)); + GlobalValue::GUID G = getDefinedFunctionGUID(*F); + for (const auto *Node = FuncInfo.find(G)->second.Index.Next; Node; + Node = Node->Next) + V(*reinterpret_cast(Node)); } const CtxProfFlatProfile PGOContextualProfile::flatten() const { diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp index 3d2fa226ff15b9..4ead5cdcf29c01 100644 --- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp +++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -655,7 +655,7 @@ CallBase *llvm::promoteCallWithIfThenElse(CallBase &CB, Function &Callee, Ctx.counters()[IndirectID] = IndirectCount; }; - CtxProf.update(ProfileUpdater, &Caller); + CtxProf.update(ProfileUpdater, Caller); return &DirectCall; } diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 25a01558165da0..671b0d0822a5d9 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -2375,7 +2375,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, assert(Deleted); (void)Deleted; }; - CtxProf.update(Updater, &Caller); + CtxProf.update(Updater, Caller); return Ret; }