Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SampleFDO] Stale profile call-graph matching #95135

Merged
merged 25 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
622d78f
[SampleFDO] Stale profile renaming matching
wlei-llvm May 4, 2024
c7ada8b
addressing comments
wlei-llvm May 20, 2024
183f6ae
fix test & udpate non-inline call targets & addressing comments
wlei-llvm May 23, 2024
6beddf2
addressing comments
wlei-llvm May 29, 2024
7f38b7e
change ProfCallee to ProfFunc
wlei-llvm May 30, 2024
ae436dd
early break for non-inline callees match when NewIRCallees is empty
wlei-llvm May 30, 2024
7cb0cd7
refactoring findOrMatchFunction
wlei-llvm May 30, 2024
beaa601
addressing comments
wlei-llvm Jun 3, 2024
c61ee03
fix comment
wlei-llvm Jun 4, 2024
81811c3
run matching in top-down order and along with CFG matching
wlei-llvm Jun 9, 2024
4bdda81
add stats
wlei-llvm Jun 10, 2024
df77394
FunctionProfileNameMap to FuncToProfileNameMap and fix lint
wlei-llvm Jun 10, 2024
d001406
fix varibale name
wlei-llvm Jun 11, 2024
8fc8f54
fix typo and incorrect comments
wlei-llvm Jun 12, 2024
ecc4000
addressing comment: build its own top-down function for matcher
wlei-llvm Jun 14, 2024
da8b752
addressing comment
wlei-llvm Jun 17, 2024
93d70fa
addressing feedback
wlei-llvm Jun 21, 2024
8c36fcc
add test for recursive case
wlei-llvm Jun 22, 2024
dc4d4f9
renaming and fix comments
wlei-llvm Jun 24, 2024
c7ae1a9
Merge branch 'main' into call-graph-matching
wlei-llvm Jun 24, 2024
7b458b4
rename to FunctionsWithoutProfile
wlei-llvm Jun 24, 2024
15e8d0c
refactor functionHasProfile
wlei-llvm Jun 25, 2024
47c1816
fix use-after-free
wlei-llvm Jul 8, 2024
82cf2a6
Merge branch 'main' into call-graph-matching
wlei-llvm Jul 8, 2024
845ebe3
Merge branch 'llvm:main' into call-graph-matching
wlei-llvm Jul 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 14 additions & 9 deletions llvm/include/llvm/ProfileData/SampleProf.h
Original file line number Diff line number Diff line change
Expand Up @@ -919,12 +919,14 @@ class FunctionSamples {
/// Returns a pointer to FunctionSamples at the given callsite location
/// \p Loc with callee \p CalleeName. If no callsite can be found, relax
/// the restriction to return the FunctionSamples at callsite location
/// \p Loc with the maximum total sample count. If \p Remapper is not
/// nullptr, use \p Remapper to find FunctionSamples with equivalent name
/// as \p CalleeName.
const FunctionSamples *
findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName,
SampleProfileReaderItaniumRemapper *Remapper) const;
/// \p Loc with the maximum total sample count. If \p Remapper or \p
/// FuncNameToProfNameMap is not nullptr, use them to find FunctionSamples
/// with equivalent name as \p CalleeName.
const FunctionSamples *findFunctionSamplesAt(
const LineLocation &Loc, StringRef CalleeName,
SampleProfileReaderItaniumRemapper *Remapper,
const HashKeyMap<std::unordered_map, FunctionId, FunctionId>
*FuncNameToProfNameMap = nullptr) const;

bool empty() const { return TotalSamples == 0; }

Expand Down Expand Up @@ -1172,11 +1174,14 @@ class FunctionSamples {
/// tree nodes in the profile.
///
/// \returns the FunctionSamples pointer to the inlined instance.
/// If \p Remapper is not nullptr, it will be used to find matching
/// FunctionSamples with not exactly the same but equivalent name.
/// If \p Remapper or \p FuncNameToProfNameMap is not nullptr, it will be used
/// to find matching FunctionSamples with not exactly the same but equivalent
/// name.
const FunctionSamples *findFunctionSamples(
const DILocation *DIL,
SampleProfileReaderItaniumRemapper *Remapper = nullptr) const;
SampleProfileReaderItaniumRemapper *Remapper = nullptr,
const HashKeyMap<std::unordered_map, FunctionId, FunctionId>
*FuncNameToProfNameMap = nullptr) const;

static bool ProfileIsProbeBased;

Expand Down
115 changes: 101 additions & 14 deletions llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ using AnchorMap = std::map<LineLocation, FunctionId>;
class SampleProfileMatcher {
Module &M;
SampleProfileReader &Reader;
LazyCallGraph &CG;
const PseudoProbeManager *ProbeManager;
const ThinOrFullLTOPhase LTOPhase;
SampleProfileMap FlattenedProfiles;
Expand Down Expand Up @@ -58,6 +59,40 @@ class SampleProfileMatcher {
StringMap<std::unordered_map<LineLocation, MatchState, LineLocationHash>>
FuncCallsiteMatchStates;

struct FuncToProfileNameMapHash {
uint64_t
operator()(const std::pair<const Function *, FunctionId> &P) const {
return hash_combine(P.first, P.second);
}
};
// A map from a pair of function and profile name to a boolean value
// indicating whether they are matched. This is used as a cache for the
// matching result.
std::unordered_map<std::pair<const Function *, FunctionId>, bool,
FuncToProfileNameMapHash>
FuncProfileMatchCache;
// The new functions found by the call graph matching. The map's key is the
// the new(renamed) function pointer and the value is old(unused) profile
// name.
std::unordered_map<Function *, FunctionId> FuncToProfileNameMap;

// A map pointer to the FuncNameToProfNameMap in SampleProfileLoader,
// which maps the function name to the matched profile name. This is used
// for sample loader to look up profile using the new name.
HashKeyMap<std::unordered_map, FunctionId, FunctionId> *FuncNameToProfNameMap;

// A map pointer to the SymbolMap in SampleProfileLoader, which stores all
// the original matched symbols before the matching. this is to determine if
// the profile is unused(to be matched) or not.
HashKeyMap<std::unordered_map, FunctionId, Function *> *SymbolMap;

// The new functions from IR.
HashKeyMap<std::unordered_map, FunctionId, Function *>
FunctionsWithoutProfile;

// Pointer to the Profile Symbol List in the reader.
std::shared_ptr<ProfileSymbolList> PSL;

// Profile mismatch statstics:
uint64_t TotalProfiledFunc = 0;
// Num of checksum-mismatched function.
Expand All @@ -72,34 +107,61 @@ class SampleProfileMatcher {
uint64_t MismatchedCallsiteSamples = 0;
uint64_t RecoveredCallsiteSamples = 0;

// Profile call-graph matching statstics:
uint64_t NumCallGraphRecoveredProfiledFunc = 0;
uint64_t NumCallGraphRecoveredFuncSamples = 0;

// A dummy name for unknown indirect callee, used to differentiate from a
// non-call instruction that also has an empty callee name.
static constexpr const char *UnknownIndirectCallee =
"unknown.indirect.callee";

public:
SampleProfileMatcher(Module &M, SampleProfileReader &Reader,
const PseudoProbeManager *ProbeManager,
ThinOrFullLTOPhase LTOPhase)
: M(M), Reader(Reader), ProbeManager(ProbeManager), LTOPhase(LTOPhase){};
SampleProfileMatcher(
Module &M, SampleProfileReader &Reader, LazyCallGraph &CG,
const PseudoProbeManager *ProbeManager, ThinOrFullLTOPhase LTOPhase,
HashKeyMap<std::unordered_map, FunctionId, Function *> &SymMap,
std::shared_ptr<ProfileSymbolList> PSL,
HashKeyMap<std::unordered_map, FunctionId, FunctionId>
&FuncNameToProfNameMap)
: M(M), Reader(Reader), CG(CG), ProbeManager(ProbeManager),
LTOPhase(LTOPhase), FuncNameToProfNameMap(&FuncNameToProfNameMap),
SymbolMap(&SymMap), PSL(PSL) {};
void runOnModule();
void clearMatchingData() {
// Do not clear FuncMappings, it stores IRLoc to ProfLoc remappings which
// will be used for sample loader.
FuncCallsiteMatchStates.clear();
// Do not clear FlattenedProfiles as it contains function names referenced
// by FuncNameToProfNameMap. Clearing this memory could lead to a
// use-after-free error.
freeContainer(FuncCallsiteMatchStates);
freeContainer(FunctionsWithoutProfile);
freeContainer(FuncToProfileNameMap);
}

private:
FunctionSamples *getFlattenedSamplesFor(const Function &F) {
StringRef CanonFName = FunctionSamples::getCanonicalFnName(F);
auto It = FlattenedProfiles.find(FunctionId(CanonFName));
FunctionSamples *getFlattenedSamplesFor(const FunctionId &Fname) {
auto It = FlattenedProfiles.find(Fname);
if (It != FlattenedProfiles.end())
return &It->second;
return nullptr;
}
FunctionSamples *getFlattenedSamplesFor(const Function &F) {
StringRef CanonFName = FunctionSamples::getCanonicalFnName(F);
return getFlattenedSamplesFor(FunctionId(CanonFName));
}
template <typename T> inline void freeContainer(T &C) {
T Empty;
std::swap(C, Empty);
}
void getFilteredAnchorList(const AnchorMap &IRAnchors,
const AnchorMap &ProfileAnchors,
AnchorList &FilteredIRAnchorsList,
AnchorList &FilteredProfileAnchorList);
void runOnFunction(Function &F);
void findIRAnchors(const Function &F, AnchorMap &IRAnchors);
void findProfileAnchors(const FunctionSamples &FS, AnchorMap &ProfileAnchors);
void findIRAnchors(const Function &F, AnchorMap &IRAnchors) const;
void findProfileAnchors(const FunctionSamples &FS,
AnchorMap &ProfileAnchors) const;
// Record the callsite match states for profile staleness report, the result
// is saved in FuncCallsiteMatchStates.
void recordCallsiteMatchStates(const Function &F, const AnchorMap &IRAnchors,
Expand All @@ -124,6 +186,9 @@ class SampleProfileMatcher {
State == MatchState::RemovedMatch;
};

void countCallGraphRecoveredSamples(
const FunctionSamples &FS,
std::unordered_set<FunctionId> &MatchedUnusedProfile);
// Count the samples of checksum mismatched function for the top-level
// function and all inlinees.
void countMismatchedFuncSamples(const FunctionSamples &FS, bool IsTopLevel);
Expand Down Expand Up @@ -151,15 +216,37 @@ class SampleProfileMatcher {
// parts from the resulting SES are used to remap the IR locations to the
// profile locations. As the number of function callsite is usually not big,
// we currently just implements the basic greedy version(page 6 of the paper).
LocToLocMap
longestCommonSequence(const AnchorList &IRCallsiteAnchors,
const AnchorList &ProfileCallsiteAnchors) const;
LocToLocMap longestCommonSequence(const AnchorList &IRCallsiteAnchors,
const AnchorList &ProfileCallsiteAnchors,
bool MatchUnusedFunction);
void matchNonCallsiteLocs(const LocToLocMap &AnchorMatchings,
const AnchorMap &IRAnchors,
LocToLocMap &IRToProfileLocationMap);
void runStaleProfileMatching(const Function &F, const AnchorMap &IRAnchors,
const AnchorMap &ProfileAnchors,
LocToLocMap &IRToProfileLocationMap);
LocToLocMap &IRToProfileLocationMap,
bool RunCFGMatching, bool RunCGMatching);
// If the function doesn't have profile, return the pointer to the function.
bool functionHasProfile(const FunctionId &IRFuncName,
Function *&FuncWithoutProfile);
bool isProfileUnused(const FunctionId &ProfileFuncName);
bool functionMatchesProfileHelper(const Function &IRFunc,
const FunctionId &ProfFunc);
// Determine if the function matches profile. If FindMatchedProfileOnly is
// set, only search the existing matched function. Otherwise, try matching the
// two functions.
bool functionMatchesProfile(const FunctionId &IRFuncName,
const FunctionId &ProfileFuncName,
bool FindMatchedProfileOnly);
// Determine if the function matches profile by computing a similarity ratio
// between two sequences of callsite anchors extracted from function and
// profile. If it's above the threshold, the function matches the profile.
bool functionMatchesProfile(Function &IRFunc, const FunctionId &ProfFunc,
bool FindMatchedProfileOnly);
// Find functions that don't show in the profile or profile symbol list,
// which are supposed to be new functions. We use them as the targets for
// call graph matching.
void findFunctionsWithoutProfile();
void reportOrPersistProfileStats();
};
} // end namespace llvm
Expand Down
17 changes: 17 additions & 0 deletions llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/PostDominators.h"
Expand Down Expand Up @@ -155,6 +156,22 @@ static inline bool skipProfileForFunction(const Function &F) {
return F.isDeclaration() || !F.hasFnAttribute("use-sample-profile");
}

static inline void
buildTopDownFuncOrder(LazyCallGraph &CG,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function actually yields bottom-up order.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed the name

std::vector<Function *> &FunctionOrderList) {
CG.buildRefSCCs();
for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
for (LazyCallGraph::SCC &C : RC) {
for (LazyCallGraph::Node &N : C) {
Function &F = N.getFunction();
if (!skipProfileForFunction(F))
FunctionOrderList.push_back(&F);
}
}
}
std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
}

template <typename FT> class SampleProfileLoaderBaseImpl {
public:
SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName,
Expand Down
36 changes: 26 additions & 10 deletions llvm/lib/ProfileData/SampleProf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,9 @@ LineLocation FunctionSamples::getCallSiteIdentifier(const DILocation *DIL,
}

const FunctionSamples *FunctionSamples::findFunctionSamples(
const DILocation *DIL, SampleProfileReaderItaniumRemapper *Remapper) const {
const DILocation *DIL, SampleProfileReaderItaniumRemapper *Remapper,
const HashKeyMap<std::unordered_map, FunctionId, FunctionId>
*FuncNameToProfNameMap) const {
assert(DIL);
SmallVector<std::pair<LineLocation, StringRef>, 10> S;

Expand All @@ -256,7 +258,8 @@ const FunctionSamples *FunctionSamples::findFunctionSamples(
return this;
const FunctionSamples *FS = this;
for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) {
FS = FS->findFunctionSamplesAt(S[i].first, S[i].second, Remapper);
FS = FS->findFunctionSamplesAt(S[i].first, S[i].second, Remapper,
FuncNameToProfNameMap);
}
return FS;
}
Expand All @@ -277,19 +280,32 @@ void FunctionSamples::findAllNames(DenseSet<FunctionId> &NameSet) const {

const FunctionSamples *FunctionSamples::findFunctionSamplesAt(
const LineLocation &Loc, StringRef CalleeName,
SampleProfileReaderItaniumRemapper *Remapper) const {
SampleProfileReaderItaniumRemapper *Remapper,
const HashKeyMap<std::unordered_map, FunctionId, FunctionId>
*FuncNameToProfNameMap) const {
CalleeName = getCanonicalFnName(CalleeName);

auto iter = CallsiteSamples.find(mapIRLocToProfileLoc(Loc));
if (iter == CallsiteSamples.end())
auto I = CallsiteSamples.find(mapIRLocToProfileLoc(Loc));
if (I == CallsiteSamples.end())
return nullptr;
auto FS = iter->second.find(getRepInFormat(CalleeName));
if (FS != iter->second.end())
auto FS = I->second.find(getRepInFormat(CalleeName));
if (FS != I->second.end())
return &FS->second;

if (FuncNameToProfNameMap && !FuncNameToProfNameMap->empty()) {
auto R = FuncNameToProfNameMap->find(FunctionId(CalleeName));
if (R != FuncNameToProfNameMap->end()) {
CalleeName = R->second.stringRef();
auto FS = I->second.find(getRepInFormat(CalleeName));
if (FS != I->second.end())
return &FS->second;
}
}

if (Remapper) {
if (auto NameInProfile = Remapper->lookUpNameInProfile(CalleeName)) {
auto FS = iter->second.find(getRepInFormat(*NameInProfile));
if (FS != iter->second.end())
auto FS = I->second.find(getRepInFormat(*NameInProfile));
if (FS != I->second.end())
return &FS->second;
}
}
Expand All @@ -300,7 +316,7 @@ const FunctionSamples *FunctionSamples::findFunctionSamplesAt(
return nullptr;
uint64_t MaxTotalSamples = 0;
const FunctionSamples *R = nullptr;
for (const auto &NameFS : iter->second)
for (const auto &NameFS : I->second)
if (NameFS.second.getTotalSamples() >= MaxTotalSamples) {
MaxTotalSamples = NameFS.second.getTotalSamples();
R = &NameFS.second;
Expand Down
Loading