Skip to content

Commit

Permalink
[nfc][ctx_prof] Efficient profile traversal and update (#110052)
Browse files Browse the repository at this point in the history
This optimizes profile updates and visits, where we want to access contexts for a specific function. These are all the current update cases. We do so by maintaining a list of contexts for each function, preserving preorder traversal. The list is updated whenever contexts are `std::move`-d or deleted.
  • Loading branch information
mtrofin authored Sep 27, 2024
1 parent 1fd1f65 commit c4952e5
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 16 deletions.
6 changes: 4 additions & 2 deletions llvm/include/llvm/Analysis/CtxProfAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class PGOContextualProfile {
uint32_t NextCounterIndex = 0;
uint32_t NextCallsiteIndex = 0;
const std::string Name;

PGOCtxProfContext Index;
FunctionInfo(StringRef Name) : Name(Name) {}
};
std::optional<PGOCtxProfContext::CallTargetMapTy> Profiles;
Expand All @@ -50,6 +50,8 @@ class PGOContextualProfile {
// its state piecemeal.
PGOContextualProfile() = default;

void initIndex();

public:
PGOContextualProfile(const PGOContextualProfile &) = delete;
PGOContextualProfile(PGOContextualProfile &&) = default;
Expand Down Expand Up @@ -94,7 +96,7 @@ class PGOContextualProfile {
using ConstVisitor = function_ref<void(const PGOCtxProfContext &)>;
using Visitor = function_ref<void(PGOCtxProfContext &)>;

void update(Visitor, const Function *F = nullptr);
void update(Visitor, const Function &F);
void visit(ConstVisitor, const Function *F = nullptr) const;

const CtxProfFlatProfile flatten() const;
Expand Down
62 changes: 60 additions & 2 deletions llvm/include/llvm/ProfileData/PGOCtxProfReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,74 @@
#include <map>

namespace llvm {
class PGOContextualProfile;
class PGOCtxProfContext;

namespace internal {
// When we traverse the contextual profile, we typically want to visit contexts
// pertaining to a specific function. To avoid traversing the whole tree, we
// want to keep a per-function list - which will be in preorder - of that
// function's contexts. This happens in PGOContextualProfile. For memory use
// efficiency, we want to make PGOCtxProfContext an intrusive double-linked list
// node. We need to handle the cases where PGOCtxProfContext nodes are moved and
// deleted: in both cases, we need to update the index (==list). We can do that
// directly from the node in the list, without knowing who the "parent" of the
// list is. That makes the ADT ilist overkill here. Finally, IndexNode is meant
// to be an implementation detail of PGOCtxProfContext, and the only reason it's
// factored out is to avoid implementing move semantics for all its members.
class IndexNode {
// This class' members are intentionally private - it's a convenience
// implementation detail.
friend class ::llvm::PGOCtxProfContext;
friend class ::llvm::PGOContextualProfile;

IndexNode *Previous = nullptr;
IndexNode *Next = nullptr;

~IndexNode() {
if (Next)
Next->Previous = Previous;
if (Previous)
Previous->Next = Next;
}

IndexNode(const IndexNode &Other) = delete;

IndexNode(IndexNode &&Other) {
// Copy the neighbor info
Next = Other.Next;
Previous = Other.Previous;

// Update the neighbors to point to this object
if (Other.Next)
Other.Next->Previous = this;
if (Other.Previous)
Other.Previous->Next = this;

// Make sure the dtor is a noop
Other.Next = nullptr;
Other.Previous = nullptr;
}
IndexNode() = default;
};
} // namespace internal

/// A node (context) in the loaded contextual profile, suitable for mutation
/// during IPO passes. We generally expect a fraction of counters and
/// callsites to be populated. We continue to model counters as vectors, but
/// callsites are modeled as a map of a map. The expectation is that, typically,
/// there is a small number of indirect targets (usually, 1 for direct calls);
/// but potentially a large number of callsites, and, as inlining progresses,
/// the callsite count of a caller will grow.
class PGOCtxProfContext final {
class PGOCtxProfContext final : public internal::IndexNode {
public:
using CallTargetMapTy = std::map<GlobalValue::GUID, PGOCtxProfContext>;
using CallsiteMapTy = std::map<uint32_t, CallTargetMapTy>;

private:
friend class PGOCtxProfileReader;
friend class PGOContextualProfile;

GlobalValue::GUID GUID = 0;
SmallVector<uint64_t, 16> Counters;
CallsiteMapTy Callsites;
Expand All @@ -47,11 +101,15 @@ class PGOCtxProfContext final {
getOrEmplace(uint32_t Index, GlobalValue::GUID G,
SmallVectorImpl<uint64_t> &&Counters);

// Create a bogus context object, used for anchoring the index double linked
// list - see IndexNode
PGOCtxProfContext() = default;

public:
PGOCtxProfContext(const PGOCtxProfContext &) = delete;
PGOCtxProfContext &operator=(const PGOCtxProfContext &) = delete;
PGOCtxProfContext(PGOCtxProfContext &&) = default;
PGOCtxProfContext &operator=(PGOCtxProfContext &&) = default;
PGOCtxProfContext &operator=(PGOCtxProfContext &&) = delete;

GlobalValue::GUID guid() const { return GUID; }
const SmallVectorImpl<uint64_t> &counters() const { return Counters; }
Expand Down
47 changes: 37 additions & 10 deletions llvm/lib/Analysis/CtxProfAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ PGOContextualProfile CtxProfAnalysis::run(Module &M,
// If we made it this far, the Result is valid - which we mark by setting
// .Profiles.
Result.Profiles = std::move(*MaybeCtx);
Result.initIndex();
return Result;
}

Expand Down Expand Up @@ -265,11 +266,9 @@ CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) {

template <class ProfilesTy, class ProfTy>
static void preorderVisit(ProfilesTy &Profiles,
function_ref<void(ProfTy &)> Visitor,
GlobalValue::GUID Match = 0) {
function_ref<void(ProfTy &)> Visitor) {
std::function<void(ProfTy &)> Traverser = [&](auto &Ctx) {
if (!Match || Ctx.guid() == Match)
Visitor(Ctx);
Visitor(Ctx);
for (auto &[_, SubCtxSet] : Ctx.callsites())
for (auto &[__, Subctx] : SubCtxSet)
Traverser(Subctx);
Expand All @@ -278,16 +277,44 @@ static void preorderVisit(ProfilesTy &Profiles,
Traverser(P);
}

void PGOContextualProfile::update(Visitor V, const Function *F) {
GlobalValue::GUID G = F ? getDefinedFunctionGUID(*F) : 0U;
void PGOContextualProfile::initIndex() {
// Initialize the head of the index list for each function. We don't need it
// after this point.
DenseMap<GlobalValue::GUID, PGOCtxProfContext *> InsertionPoints;
for (auto &[Guid, FI] : FuncInfo)
InsertionPoints[Guid] = &FI.Index;
preorderVisit<PGOCtxProfContext::CallTargetMapTy, PGOCtxProfContext>(
*Profiles, V, G);
*Profiles, [&](PGOCtxProfContext &Ctx) {
auto InsertIt = InsertionPoints.find(Ctx.guid());
if (InsertIt == InsertionPoints.end())
return;
// Insert at the end of the list. Since we traverse in preorder, it
// means that when we iterate the list from the beginning, we'd
// encounter the contexts in the order we would have, should we have
// performed a full preorder traversal.
InsertIt->second->Next = &Ctx;
Ctx.Previous = InsertIt->second;
InsertIt->second = &Ctx;
});
}

void PGOContextualProfile::update(Visitor V, const Function &F) {
assert(isFunctionKnown(F));
GlobalValue::GUID G = getDefinedFunctionGUID(F);
for (auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
Node = Node->Next)
V(*reinterpret_cast<PGOCtxProfContext *>(Node));
}

void PGOContextualProfile::visit(ConstVisitor V, const Function *F) const {
GlobalValue::GUID G = F ? getDefinedFunctionGUID(*F) : 0U;
preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
const PGOCtxProfContext>(*Profiles, V, G);
if (!F)
return preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
const PGOCtxProfContext>(*Profiles, V);
assert(isFunctionKnown(*F));
GlobalValue::GUID G = getDefinedFunctionGUID(*F);
for (const auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
Node = Node->Next)
V(*reinterpret_cast<const PGOCtxProfContext *>(Node));
}

const CtxProfFlatProfile PGOContextualProfile::flatten() const {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ CallBase *llvm::promoteCallWithIfThenElse(CallBase &CB, Function &Callee,
Ctx.counters()[IndirectID] = IndirectCount;

};
CtxProf.update(ProfileUpdater, &Caller);
CtxProf.update(ProfileUpdater, Caller);
return &DirectCall;
}

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Utils/InlineFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2375,7 +2375,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
assert(Deleted);
(void)Deleted;
};
CtxProf.update(Updater, &Caller);
CtxProf.update(Updater, Caller);
return Ret;
}

Expand Down

0 comments on commit c4952e5

Please sign in to comment.