diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index 5c2a78c14efd0f..e7b154dff06971 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -919,12 +919,14 @@ class FunctionSamples { /// Returns a pointer to FunctionSamples at the given callsite location /// \p Loc with callee \p CalleeName. If no callsite can be found, relax /// the restriction to return the FunctionSamples at callsite location - /// \p Loc with the maximum total sample count. If \p Remapper is not - /// nullptr, use \p Remapper to find FunctionSamples with equivalent name - /// as \p CalleeName. - const FunctionSamples * - findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName, - SampleProfileReaderItaniumRemapper *Remapper) const; + /// \p Loc with the maximum total sample count. If \p Remapper or \p + /// FuncNameToProfNameMap is not nullptr, use them to find FunctionSamples + /// with equivalent name as \p CalleeName. + const FunctionSamples *findFunctionSamplesAt( + const LineLocation &Loc, StringRef CalleeName, + SampleProfileReaderItaniumRemapper *Remapper, + const HashKeyMap + *FuncNameToProfNameMap = nullptr) const; bool empty() const { return TotalSamples == 0; } @@ -1172,11 +1174,14 @@ class FunctionSamples { /// tree nodes in the profile. /// /// \returns the FunctionSamples pointer to the inlined instance. - /// If \p Remapper is not nullptr, it will be used to find matching - /// FunctionSamples with not exactly the same but equivalent name. + /// If \p Remapper or \p FuncNameToProfNameMap is not nullptr, it will be used + /// to find matching FunctionSamples with not exactly the same but equivalent + /// name. const FunctionSamples *findFunctionSamples( const DILocation *DIL, - SampleProfileReaderItaniumRemapper *Remapper = nullptr) const; + SampleProfileReaderItaniumRemapper *Remapper = nullptr, + const HashKeyMap + *FuncNameToProfNameMap = nullptr) const; static bool ProfileIsProbeBased; diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h index b6feca5d470355..a67f158433391c 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h @@ -26,6 +26,7 @@ using AnchorMap = std::map; class SampleProfileMatcher { Module &M; SampleProfileReader &Reader; + LazyCallGraph &CG; const PseudoProbeManager *ProbeManager; const ThinOrFullLTOPhase LTOPhase; SampleProfileMap FlattenedProfiles; @@ -58,6 +59,40 @@ class SampleProfileMatcher { StringMap> FuncCallsiteMatchStates; + struct FuncToProfileNameMapHash { + uint64_t + operator()(const std::pair &P) const { + return hash_combine(P.first, P.second); + } + }; + // A map from a pair of function and profile name to a boolean value + // indicating whether they are matched. This is used as a cache for the + // matching result. + std::unordered_map, bool, + FuncToProfileNameMapHash> + FuncProfileMatchCache; + // The new functions found by the call graph matching. The map's key is the + // the new(renamed) function pointer and the value is old(unused) profile + // name. + std::unordered_map FuncToProfileNameMap; + + // A map pointer to the FuncNameToProfNameMap in SampleProfileLoader, + // which maps the function name to the matched profile name. This is used + // for sample loader to look up profile using the new name. + HashKeyMap *FuncNameToProfNameMap; + + // A map pointer to the SymbolMap in SampleProfileLoader, which stores all + // the original matched symbols before the matching. this is to determine if + // the profile is unused(to be matched) or not. + HashKeyMap *SymbolMap; + + // The new functions from IR. + HashKeyMap + FunctionsWithoutProfile; + + // Pointer to the Profile Symbol List in the reader. + std::shared_ptr PSL; + // Profile mismatch statstics: uint64_t TotalProfiledFunc = 0; // Num of checksum-mismatched function. @@ -72,34 +107,61 @@ class SampleProfileMatcher { uint64_t MismatchedCallsiteSamples = 0; uint64_t RecoveredCallsiteSamples = 0; + // Profile call-graph matching statstics: + uint64_t NumCallGraphRecoveredProfiledFunc = 0; + uint64_t NumCallGraphRecoveredFuncSamples = 0; + // A dummy name for unknown indirect callee, used to differentiate from a // non-call instruction that also has an empty callee name. static constexpr const char *UnknownIndirectCallee = "unknown.indirect.callee"; public: - SampleProfileMatcher(Module &M, SampleProfileReader &Reader, - const PseudoProbeManager *ProbeManager, - ThinOrFullLTOPhase LTOPhase) - : M(M), Reader(Reader), ProbeManager(ProbeManager), LTOPhase(LTOPhase){}; + SampleProfileMatcher( + Module &M, SampleProfileReader &Reader, LazyCallGraph &CG, + const PseudoProbeManager *ProbeManager, ThinOrFullLTOPhase LTOPhase, + HashKeyMap &SymMap, + std::shared_ptr PSL, + HashKeyMap + &FuncNameToProfNameMap) + : M(M), Reader(Reader), CG(CG), ProbeManager(ProbeManager), + LTOPhase(LTOPhase), FuncNameToProfNameMap(&FuncNameToProfNameMap), + SymbolMap(&SymMap), PSL(PSL) {}; void runOnModule(); void clearMatchingData() { // Do not clear FuncMappings, it stores IRLoc to ProfLoc remappings which // will be used for sample loader. - FuncCallsiteMatchStates.clear(); + // Do not clear FlattenedProfiles as it contains function names referenced + // by FuncNameToProfNameMap. Clearing this memory could lead to a + // use-after-free error. + freeContainer(FuncCallsiteMatchStates); + freeContainer(FunctionsWithoutProfile); + freeContainer(FuncToProfileNameMap); } private: - FunctionSamples *getFlattenedSamplesFor(const Function &F) { - StringRef CanonFName = FunctionSamples::getCanonicalFnName(F); - auto It = FlattenedProfiles.find(FunctionId(CanonFName)); + FunctionSamples *getFlattenedSamplesFor(const FunctionId &Fname) { + auto It = FlattenedProfiles.find(Fname); if (It != FlattenedProfiles.end()) return &It->second; return nullptr; } + FunctionSamples *getFlattenedSamplesFor(const Function &F) { + StringRef CanonFName = FunctionSamples::getCanonicalFnName(F); + return getFlattenedSamplesFor(FunctionId(CanonFName)); + } + template inline void freeContainer(T &C) { + T Empty; + std::swap(C, Empty); + } + void getFilteredAnchorList(const AnchorMap &IRAnchors, + const AnchorMap &ProfileAnchors, + AnchorList &FilteredIRAnchorsList, + AnchorList &FilteredProfileAnchorList); void runOnFunction(Function &F); - void findIRAnchors(const Function &F, AnchorMap &IRAnchors); - void findProfileAnchors(const FunctionSamples &FS, AnchorMap &ProfileAnchors); + void findIRAnchors(const Function &F, AnchorMap &IRAnchors) const; + void findProfileAnchors(const FunctionSamples &FS, + AnchorMap &ProfileAnchors) const; // Record the callsite match states for profile staleness report, the result // is saved in FuncCallsiteMatchStates. void recordCallsiteMatchStates(const Function &F, const AnchorMap &IRAnchors, @@ -124,6 +186,9 @@ class SampleProfileMatcher { State == MatchState::RemovedMatch; }; + void countCallGraphRecoveredSamples( + const FunctionSamples &FS, + std::unordered_set &MatchedUnusedProfile); // Count the samples of checksum mismatched function for the top-level // function and all inlinees. void countMismatchedFuncSamples(const FunctionSamples &FS, bool IsTopLevel); @@ -151,15 +216,37 @@ class SampleProfileMatcher { // parts from the resulting SES are used to remap the IR locations to the // profile locations. As the number of function callsite is usually not big, // we currently just implements the basic greedy version(page 6 of the paper). - LocToLocMap - longestCommonSequence(const AnchorList &IRCallsiteAnchors, - const AnchorList &ProfileCallsiteAnchors) const; + LocToLocMap longestCommonSequence(const AnchorList &IRCallsiteAnchors, + const AnchorList &ProfileCallsiteAnchors, + bool MatchUnusedFunction); void matchNonCallsiteLocs(const LocToLocMap &AnchorMatchings, const AnchorMap &IRAnchors, LocToLocMap &IRToProfileLocationMap); void runStaleProfileMatching(const Function &F, const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors, - LocToLocMap &IRToProfileLocationMap); + LocToLocMap &IRToProfileLocationMap, + bool RunCFGMatching, bool RunCGMatching); + // If the function doesn't have profile, return the pointer to the function. + bool functionHasProfile(const FunctionId &IRFuncName, + Function *&FuncWithoutProfile); + bool isProfileUnused(const FunctionId &ProfileFuncName); + bool functionMatchesProfileHelper(const Function &IRFunc, + const FunctionId &ProfFunc); + // Determine if the function matches profile. If FindMatchedProfileOnly is + // set, only search the existing matched function. Otherwise, try matching the + // two functions. + bool functionMatchesProfile(const FunctionId &IRFuncName, + const FunctionId &ProfileFuncName, + bool FindMatchedProfileOnly); + // Determine if the function matches profile by computing a similarity ratio + // between two sequences of callsite anchors extracted from function and + // profile. If it's above the threshold, the function matches the profile. + bool functionMatchesProfile(Function &IRFunc, const FunctionId &ProfFunc, + bool FindMatchedProfileOnly); + // Find functions that don't show in the profile or profile symbol list, + // which are supposed to be new functions. We use them as the targets for + // call graph matching. + void findFunctionsWithoutProfile(); void reportOrPersistProfileStats(); }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h index 7c725a3c1216cb..32bf7b8c96be3d 100644 --- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h @@ -22,6 +22,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PostDominators.h" @@ -155,6 +156,22 @@ static inline bool skipProfileForFunction(const Function &F) { return F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"); } +static inline void +buildTopDownFuncOrder(LazyCallGraph &CG, + std::vector &FunctionOrderList) { + CG.buildRefSCCs(); + for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) { + for (LazyCallGraph::SCC &C : RC) { + for (LazyCallGraph::Node &N : C) { + Function &F = N.getFunction(); + if (!skipProfileForFunction(F)) + FunctionOrderList.push_back(&F); + } + } + } + std::reverse(FunctionOrderList.begin(), FunctionOrderList.end()); +} + template class SampleProfileLoaderBaseImpl { public: SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName, diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp index 294f64636d989c..addb473faebdff 100644 --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -236,7 +236,9 @@ LineLocation FunctionSamples::getCallSiteIdentifier(const DILocation *DIL, } const FunctionSamples *FunctionSamples::findFunctionSamples( - const DILocation *DIL, SampleProfileReaderItaniumRemapper *Remapper) const { + const DILocation *DIL, SampleProfileReaderItaniumRemapper *Remapper, + const HashKeyMap + *FuncNameToProfNameMap) const { assert(DIL); SmallVector, 10> S; @@ -256,7 +258,8 @@ const FunctionSamples *FunctionSamples::findFunctionSamples( return this; const FunctionSamples *FS = this; for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) { - FS = FS->findFunctionSamplesAt(S[i].first, S[i].second, Remapper); + FS = FS->findFunctionSamplesAt(S[i].first, S[i].second, Remapper, + FuncNameToProfNameMap); } return FS; } @@ -277,19 +280,32 @@ void FunctionSamples::findAllNames(DenseSet &NameSet) const { const FunctionSamples *FunctionSamples::findFunctionSamplesAt( const LineLocation &Loc, StringRef CalleeName, - SampleProfileReaderItaniumRemapper *Remapper) const { + SampleProfileReaderItaniumRemapper *Remapper, + const HashKeyMap + *FuncNameToProfNameMap) const { CalleeName = getCanonicalFnName(CalleeName); - auto iter = CallsiteSamples.find(mapIRLocToProfileLoc(Loc)); - if (iter == CallsiteSamples.end()) + auto I = CallsiteSamples.find(mapIRLocToProfileLoc(Loc)); + if (I == CallsiteSamples.end()) return nullptr; - auto FS = iter->second.find(getRepInFormat(CalleeName)); - if (FS != iter->second.end()) + auto FS = I->second.find(getRepInFormat(CalleeName)); + if (FS != I->second.end()) return &FS->second; + + if (FuncNameToProfNameMap && !FuncNameToProfNameMap->empty()) { + auto R = FuncNameToProfNameMap->find(FunctionId(CalleeName)); + if (R != FuncNameToProfNameMap->end()) { + CalleeName = R->second.stringRef(); + auto FS = I->second.find(getRepInFormat(CalleeName)); + if (FS != I->second.end()) + return &FS->second; + } + } + if (Remapper) { if (auto NameInProfile = Remapper->lookUpNameInProfile(CalleeName)) { - auto FS = iter->second.find(getRepInFormat(*NameInProfile)); - if (FS != iter->second.end()) + auto FS = I->second.find(getRepInFormat(*NameInProfile)); + if (FS != I->second.end()) return &FS->second; } } @@ -300,7 +316,7 @@ const FunctionSamples *FunctionSamples::findFunctionSamplesAt( return nullptr; uint64_t MaxTotalSamples = 0; const FunctionSamples *R = nullptr; - for (const auto &NameFS : iter->second) + for (const auto &NameFS : I->second) if (NameFS.second.getTotalSamples() >= MaxTotalSamples) { MaxTotalSamples = NameFS.second.getTotalSamples(); R = &NameFS.second; diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 13c0e0d0abff0c..5cc2911a1a80eb 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -134,6 +134,10 @@ cl::opt SalvageStaleProfile( "salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query.")); +cl::opt + SalvageUnusedProfile("salvage-unused-profile", cl::Hidden, cl::init(false), + cl::desc("Salvage unused profile by matching with new " + "functions on call graph.")); cl::opt ReportProfileStaleness( "report-profile-staleness", cl::Hidden, cl::init(false), @@ -462,12 +466,13 @@ class SampleProfileLoader final : public SampleProfileLoaderBaseImpl { IntrusiveRefCntPtr FS, std::function GetAssumptionCache, std::function GetTargetTransformInfo, - std::function GetTLI) + std::function GetTLI, + LazyCallGraph &CG) : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName), std::move(FS)), GetAC(std::move(GetAssumptionCache)), GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)), - LTOPhase(LTOPhase), + CG(CG), LTOPhase(LTOPhase), AnnotatedPassName(AnnotateSampleProfileInlinePhase ? llvm::AnnotateInlinePassName(InlineContext{ LTOPhase, InlinePass::SampleProfileInliner}) @@ -475,7 +480,7 @@ class SampleProfileLoader final : public SampleProfileLoaderBaseImpl { bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr); bool runOnModule(Module &M, ModuleAnalysisManager *AM, - ProfileSummaryInfo *_PSI, LazyCallGraph &CG); + ProfileSummaryInfo *_PSI); protected: bool runOnFunction(Function &F, ModuleAnalysisManager *AM); @@ -527,9 +532,14 @@ class SampleProfileLoader final : public SampleProfileLoaderBaseImpl { /// is one-to-one mapping. HashKeyMap SymbolMap; + /// Map from function name to profile name generated by call-graph based + /// profile fuzzy matching(--salvage-unused-profile). + HashKeyMap FuncNameToProfNameMap; + std::function GetAC; std::function GetTTI; std::function GetTLI; + LazyCallGraph &CG; /// Profile tracker for different context. std::unique_ptr ContextTracker; @@ -544,7 +554,7 @@ class SampleProfileLoader final : public SampleProfileLoaderBaseImpl { /// Profle Symbol list tells whether a function name appears in the binary /// used to generate the current profile. - std::unique_ptr PSL; + std::shared_ptr PSL; /// Total number of samples collected in this profile. /// @@ -696,7 +706,8 @@ SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const { return nullptr; return FS->findFunctionSamplesAt(FunctionSamples::getCallSiteIdentifier(DIL), - CalleeName, Reader->getRemapper()); + CalleeName, Reader->getRemapper(), + &FuncNameToProfNameMap); } /// Returns a vector of FunctionSamples that are the indirect call targets @@ -774,8 +785,8 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { if (FunctionSamples::ProfileIsCS) it.first->second = ContextTracker->getContextSamplesFor(DIL); else - it.first->second = - Samples->findFunctionSamples(DIL, Reader->getRemapper()); + it.first->second = Samples->findFunctionSamples( + DIL, Reader->getRemapper(), &FuncNameToProfNameMap); } return it.first->second; } @@ -1923,20 +1934,9 @@ SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) { } ++CGI; } - } else { - CG.buildRefSCCs(); - for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) { - for (LazyCallGraph::SCC &C : RC) { - for (LazyCallGraph::Node &N : C) { - Function &F = N.getFunction(); - if (!skipProfileForFunction(F)) - FunctionOrderList.push_back(&F); - } - } - } - } - - std::reverse(FunctionOrderList.begin(), FunctionOrderList.end()); + std::reverse(FunctionOrderList.begin(), FunctionOrderList.end()); + } else + buildTopDownFuncOrder(CG, FunctionOrderList); LLVM_DEBUG({ dbgs() << "Function processing order:\n"; @@ -2066,7 +2066,8 @@ bool SampleProfileLoader::doInitialization(Module &M, if (ReportProfileStaleness || PersistProfileStaleness || SalvageStaleProfile) { MatchingManager = std::make_unique( - M, *Reader, ProbeManager.get(), LTOPhase); + M, *Reader, CG, ProbeManager.get(), LTOPhase, SymbolMap, PSL, + FuncNameToProfNameMap); } return true; @@ -2136,8 +2137,7 @@ void SampleProfileLoader::removePseudoProbeInsts(Module &M) { } bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, - ProfileSummaryInfo *_PSI, - LazyCallGraph &CG) { + ProfileSummaryInfo *_PSI) { GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); PSI = _PSI; @@ -2182,14 +2182,18 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, } } } - assert(SymbolMap.count(FunctionId()) == 0 && - "No empty StringRef should be added in SymbolMap"); + // Stale profile matching. if (ReportProfileStaleness || PersistProfileStaleness || SalvageStaleProfile) { MatchingManager->runOnModule(); MatchingManager->clearMatchingData(); } + assert(SymbolMap.count(FunctionId()) == 0 && + "No empty StringRef should be added in SymbolMap"); + assert((SalvageUnusedProfile || FuncNameToProfNameMap.empty()) && + "FuncNameToProfNameMap is not empty when --salvage-unused-profile is " + "not enabled"); bool retval = false; for (auto *F : buildFunctionOrder(M, CG)) { @@ -2319,19 +2323,18 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M, if (!FS) FS = vfs::getRealFileSystem(); + LazyCallGraph &CG = AM.getResult(M); SampleProfileLoader SampleLoader( ProfileFileName.empty() ? SampleProfileFile : ProfileFileName, ProfileRemappingFileName.empty() ? SampleProfileRemappingFile : ProfileRemappingFileName, - LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI); - + LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI, CG); if (!SampleLoader.doInitialization(M, &FAM)) return PreservedAnalyses::all(); ProfileSummaryInfo *PSI = &AM.getResult(M); - LazyCallGraph &CG = AM.getResult(M); - if (!SampleLoader.runOnModule(M, &AM, PSI, CG)) + if (!SampleLoader.runOnModule(M, &AM, PSI)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp index 11368e3375bddd..312672e56b0170 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp @@ -21,7 +21,23 @@ using namespace sampleprof; #define DEBUG_TYPE "sample-profile-matcher" +static cl::opt FuncProfileSimilarityThreshold( + "func-profile-similarity-threshold", cl::Hidden, cl::init(80), + cl::desc("Consider a profile matches a function if the similarity of their " + "callee sequences is above the specified percentile.")); + +static cl::opt MinFuncCountForCGMatching( + "min-func-count-for-cg-matching", cl::Hidden, cl::init(5), + cl::desc("The minimum number of basic blocks required for a function to " + "run stale profile call graph matching.")); + +static cl::opt MinCallCountForCGMatching( + "min-call-count-for-cg-matching", cl::Hidden, cl::init(3), + cl::desc("The minimum number of call anchors required for a function to " + "run stale profile call graph matching.")); + extern cl::opt SalvageStaleProfile; +extern cl::opt SalvageUnusedProfile; extern cl::opt PersistProfileStaleness; extern cl::opt ReportProfileStaleness; @@ -31,7 +47,7 @@ static cl::opt SalvageStaleProfileMaxCallsites( "profile matching will be skipped.")); void SampleProfileMatcher::findIRAnchors(const Function &F, - AnchorMap &IRAnchors) { + AnchorMap &IRAnchors) const { // For inlined code, recover the original callsite and callee by finding the // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the // top-level frame is "main:1", the callsite is "1" and the callee is "foo". @@ -101,7 +117,7 @@ void SampleProfileMatcher::findIRAnchors(const Function &F, } void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS, - AnchorMap &ProfileAnchors) { + AnchorMap &ProfileAnchors) const { auto isInvalidLineOffset = [](uint32_t LineOffset) { return LineOffset & 0x8000; }; @@ -133,8 +149,44 @@ void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS, } } -LocToLocMap SampleProfileMatcher::longestCommonSequence( - const AnchorList &AnchorList1, const AnchorList &AnchorList2) const { +bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName, + Function *&FuncWithoutProfile) { + FuncWithoutProfile = nullptr; + auto R = FunctionsWithoutProfile.find(IRFuncName); + if (R != FunctionsWithoutProfile.end()) + FuncWithoutProfile = R->second; + return !FuncWithoutProfile; +} + +bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) { + return SymbolMap->find(ProfileFuncName) == SymbolMap->end(); +} + +bool SampleProfileMatcher::functionMatchesProfile( + const FunctionId &IRFuncName, const FunctionId &ProfileFuncName, + bool FindMatchedProfileOnly) { + if (IRFuncName == ProfileFuncName) + return true; + if (!SalvageUnusedProfile) + return false; + + // If IR function doesn't have profile and the profile is unused, try + // matching them. + Function *IRFunc = nullptr; + if (functionHasProfile(IRFuncName, IRFunc) || + !isProfileUnused(ProfileFuncName)) + return false; + + assert(FunctionId(IRFunc->getName()) != ProfileFuncName && + "IR function should be different from profile function to match"); + return functionMatchesProfile(*IRFunc, ProfileFuncName, + FindMatchedProfileOnly); +} + +LocToLocMap +SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1, + const AnchorList &AnchorList2, + bool MatchUnusedFunction) { int32_t Size1 = AnchorList1.size(), Size2 = AnchorList2.size(), MaxDepth = Size1 + Size2; auto Index = [&](int32_t I) { return I + MaxDepth; }; @@ -195,7 +247,9 @@ LocToLocMap SampleProfileMatcher::longestCommonSequence( X = V[Index(K - 1)] + 1; Y = X - K; while (X < Size1 && Y < Size2 && - AnchorList1[X].second == AnchorList2[Y].second) + functionMatchesProfile( + AnchorList1[X].second, AnchorList2[Y].second, + !MatchUnusedFunction /* Find matched function only */)) X++, Y++; V[Index(K)] = X; @@ -266,6 +320,21 @@ void SampleProfileMatcher::matchNonCallsiteLocs( } } +// Filter the non-call locations from IRAnchors and ProfileAnchors and write +// them into a list for random access later. +void SampleProfileMatcher::getFilteredAnchorList( + const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors, + AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) { + for (const auto &I : IRAnchors) { + if (I.second.stringRef().empty()) + continue; + FilteredIRAnchorsList.emplace_back(I); + } + + for (const auto &I : ProfileAnchors) + FilteredProfileAnchorList.emplace_back(I); +} + // Call target name anchor based profile fuzzy matching. // Input: // For IR locations, the anchor is the callee name of direct callsite; For @@ -285,23 +354,19 @@ void SampleProfileMatcher::matchNonCallsiteLocs( // The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9]. void SampleProfileMatcher::runStaleProfileMatching( const Function &F, const AnchorMap &IRAnchors, - const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap) { + const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap, + bool RunCFGMatching, bool RunCGMatching) { + if (!RunCFGMatching && !RunCGMatching) + return; LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName() << "\n"); assert(IRToProfileLocationMap.empty() && "Run stale profile matching only once per function"); AnchorList FilteredProfileAnchorList; - for (const auto &I : ProfileAnchors) - FilteredProfileAnchorList.emplace_back(I); - AnchorList FilteredIRAnchorsList; - // Filter the non-callsite from IRAnchors. - for (const auto &I : IRAnchors) { - if (I.second.stringRef().empty()) - continue; - FilteredIRAnchorsList.emplace_back(I); - } + getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList, + FilteredProfileAnchorList); if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty()) return; @@ -317,14 +382,25 @@ void SampleProfileMatcher::runStaleProfileMatching( } // Match the callsite anchors by finding the longest common subsequence - // between IR and profile. Note that we need to use IR anchor as base(A side) - // to align with the order of IRToProfileLocationMap. + // between IR and profile. + // Define a match between two anchors as follows: + // 1) The function names of anchors are the same. + // 2) The similarity between the anchor functions is above a threshold if + // RunCGMatching is set. + // For 2), we only consider the anchor functions from IR and profile don't + // appear on either side to reduce the matching scope. Note that we need to + // use IR anchor as base(A side) to align with the order of + // IRToProfileLocationMap. LocToLocMap MatchedAnchors = - longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList); + longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList, + RunCGMatching /* Match unused functions */); - // Match the non-callsite locations and write the result to + // CFG level matching: + // Apply the callsite matchings to infer matching for the basic + // block(non-callsite) locations and write the result to // IRToProfileLocationMap. - matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap); + if (RunCFGMatching) + matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap); } void SampleProfileMatcher::runOnFunction(Function &F) { @@ -335,6 +411,16 @@ void SampleProfileMatcher::runOnFunction(Function &F) { // the maximum number of callsites, we merge the function profiles from all // contexts, aka, the flattened profile to find profile anchors. const auto *FSFlattened = getFlattenedSamplesFor(F); + if (SalvageUnusedProfile && !FSFlattened) { + // Apply the matching in place to find the new function's matched profile. + // TODO: For extended profile format, if a function profile is unused and + // it's top-level, even if the profile is matched, it's not found in the + // profile. This is because sample reader only read the used profile at the + // beginning, we need to support loading the profile on-demand in future. + auto R = FuncToProfileNameMap.find(&F); + if (R != FuncToProfileNameMap.end()) + FSFlattened = getFlattenedSamplesFor(R->second); + } if (!FSFlattened) return; @@ -352,28 +438,31 @@ void SampleProfileMatcher::runOnFunction(Function &F) { if (ReportProfileStaleness || PersistProfileStaleness) recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr); - // For probe-based profiles, run matching only when the current profile is not - // valid. - if (SalvageStaleProfile && (!FunctionSamples::ProfileIsProbeBased || - !ProbeManager->profileIsValid(F, *FSFlattened))) { - // For imported functions, the checksum metadata(pseudo_probe_desc) are - // dropped, so we leverage function attribute(profile-checksum-mismatch) to - // transfer the info: add the attribute during pre-link phase and check it - // during post-link phase(see "profileIsValid"). - if (FunctionSamples::ProfileIsProbeBased && - LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) - F.addFnAttr("profile-checksum-mismatch"); - - // The matching result will be saved to IRToProfileLocationMap, create a - // new map for each function. - auto &IRToProfileLocationMap = getIRToProfileLocationMap(F); - runStaleProfileMatching(F, IRAnchors, ProfileAnchors, - IRToProfileLocationMap); - // Find and update callsite match states after matching. - if (ReportProfileStaleness || PersistProfileStaleness) - recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, - &IRToProfileLocationMap); - } + if (!SalvageStaleProfile) + return; + // For probe-based profiles, run matching only when profile checksum is + // mismatched. + bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased && + !ProbeManager->profileIsValid(F, *FSFlattened); + bool RunCFGMatching = + !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch; + bool RunCGMatching = SalvageUnusedProfile; + // For imported functions, the checksum metadata(pseudo_probe_desc) are + // dropped, so we leverage function attribute(profile-checksum-mismatch) to + // transfer the info: add the attribute during pre-link phase and check it + // during post-link phase(see "profileIsValid"). + if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) + F.addFnAttr("profile-checksum-mismatch"); + + // The matching result will be saved to IRToProfileLocationMap, create a + // new map for each function. + auto &IRToProfileLocationMap = getIRToProfileLocationMap(F); + runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap, + RunCFGMatching, RunCGMatching); + // Find and update callsite match states after matching. + if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness)) + recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, + &IRToProfileLocationMap); } void SampleProfileMatcher::recordCallsiteMatchStates( @@ -532,10 +621,35 @@ void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) { } } +void SampleProfileMatcher::countCallGraphRecoveredSamples( + const FunctionSamples &FS, + std::unordered_set &CallGraphRecoveredProfiles) { + if (CallGraphRecoveredProfiles.count(FS.getFunction())) { + NumCallGraphRecoveredFuncSamples += FS.getTotalSamples(); + return; + } + + for (const auto &CM : FS.getCallsiteSamples()) { + for (const auto &CS : CM.second) { + countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles); + } + } +} + void SampleProfileMatcher::computeAndReportProfileStaleness() { if (!ReportProfileStaleness && !PersistProfileStaleness) return; + std::unordered_set CallGraphRecoveredProfiles; + if (SalvageUnusedProfile) { + for (const auto &I : FuncToProfileNameMap) { + CallGraphRecoveredProfiles.insert(I.second); + if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage())) + continue; + NumCallGraphRecoveredProfiledFunc++; + } + } + // Count profile mismatches for profile staleness report. for (const auto &F : M) { if (skipProfileForFunction(F)) @@ -550,6 +664,9 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() { TotalProfiledFunc++; TotalFunctionSamples += FS->getTotalSamples(); + if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty()) + countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles); + // Checksum mismatch is only used in pseudo-probe mode. if (FunctionSamples::ProfileIsProbeBased) countMismatchedFuncSamples(*FS, true); @@ -566,6 +683,13 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() { << MismatchedFunctionSamples << "/" << TotalFunctionSamples << ") of samples are discarded due to function hash mismatch.\n"; } + if (SalvageUnusedProfile) { + errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/" + << TotalProfiledFunc << ") of functions' profile are matched and (" + << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples + << ") of samples are reused by call graph matching.\n"; + } + errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/" << TotalProfiledCallsites << ") of callsites' profile are invalid and (" @@ -592,6 +716,13 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() { ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples); } + if (SalvageUnusedProfile) { + ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc", + NumCallGraphRecoveredProfiledFunc); + ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples", + NumCallGraphRecoveredFuncSamples); + } + ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites); ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites); ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites); @@ -606,14 +737,161 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() { } } +void SampleProfileMatcher::findFunctionsWithoutProfile() { + // TODO: Support MD5 profile. + if (FunctionSamples::UseMD5) + return; + StringSet<> NamesInProfile; + if (auto NameTable = Reader.getNameTable()) { + for (auto Name : *NameTable) + NamesInProfile.insert(Name.stringRef()); + } + + for (auto &F : M) { + // Skip declarations, as even if the function can be matched, we have + // nothing to do with it. + if (F.isDeclaration()) + continue; + + StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName()); + const auto *FS = getFlattenedSamplesFor(F); + if (FS) + continue; + + // For extended binary, functions fully inlined may not be loaded in the + // top-level profile, so check the NameTable which has the all symbol names + // in profile. + if (NamesInProfile.count(CanonFName)) + continue; + + // For extended binary, non-profiled function symbols are in the profile + // symbol list table. + if (PSL && PSL->contains(CanonFName)) + continue; + + LLVM_DEBUG(dbgs() << "Function " << CanonFName + << " is not in profile or profile symbol list.\n"); + FunctionsWithoutProfile[FunctionId(CanonFName)] = &F; + } +} + +bool SampleProfileMatcher::functionMatchesProfileHelper( + const Function &IRFunc, const FunctionId &ProfFunc) { + // The value is in the range [0, 1]. The bigger the value is, the more similar + // two sequences are. + float Similarity = 0.0; + + const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc); + if (!FSFlattened) + return false; + // The check for similarity or checksum may not be reliable if the function is + // tiny, we use the number of basic block as a proxy for the function + // complexity and skip the matching if it's too small. + if (IRFunc.size() < MinFuncCountForCGMatching || + FSFlattened->getBodySamples().size() < MinFuncCountForCGMatching) + return false; + + // For probe-based function, we first trust the checksum info. If the checksum + // doesn't match, we continue checking for similarity. + if (FunctionSamples::ProfileIsProbeBased) { + const auto *FuncDesc = ProbeManager->getDesc(IRFunc); + if (FuncDesc && + !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSFlattened)) { + LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName() + << "(IR) and " << ProfFunc << "(Profile) match.\n"); + + return true; + } + } + + AnchorMap IRAnchors; + findIRAnchors(IRFunc, IRAnchors); + AnchorMap ProfileAnchors; + findProfileAnchors(*FSFlattened, ProfileAnchors); + + AnchorList FilteredIRAnchorsList; + AnchorList FilteredProfileAnchorList; + getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList, + FilteredProfileAnchorList); + + // Similarly skip the matching if the num of anchors is not enough. + if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching || + FilteredProfileAnchorList.size() < MinCallCountForCGMatching) + return false; + + // Use the diff algorithm to find the LCS between IR and profile. + + // Don't recursively match the callee function to avoid infinite matching, + // callee functions will be handled later since it's processed in top-down + // order . + LocToLocMap MatchedAnchors = + longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList, + false /* Match unused functions */); + + Similarity = + static_cast(MatchedAnchors.size()) * 2 / + (FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size()); + + LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName() + << "(IR) and " << ProfFunc << "(profile) is " + << format("%.2f", Similarity) << "\n"); + assert((Similarity >= 0 && Similarity <= 1.0) && + "Similarity value should be in [0, 1]"); + return Similarity * 100 > FuncProfileSimilarityThreshold; +} + +// If FindMatchedProfileOnly is set to true, only use the processed function +// results. This is used for skipping the repeated recursive matching. +bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc, + const FunctionId &ProfFunc, + bool FindMatchedProfileOnly) { + auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc}); + if (R != FuncProfileMatchCache.end()) + return R->second; + + if (FindMatchedProfileOnly) + return false; + + bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc); + FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched; + if (Matched) { + FuncToProfileNameMap[&IRFunc] = ProfFunc; + LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName() + << " matches profile:" << ProfFunc << "\n"); + } + + return Matched; +} + void SampleProfileMatcher::runOnModule() { ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles, FunctionSamples::ProfileIsCS); - for (auto &F : M) { - if (skipProfileForFunction(F)) + if (SalvageUnusedProfile) + findFunctionsWithoutProfile(); + + // Process the matching in top-down order so that the caller matching result + // can be used to the callee matching. + std::vector TopDownFunctionList; + TopDownFunctionList.reserve(M.size()); + buildTopDownFuncOrder(CG, TopDownFunctionList); + for (auto *F : TopDownFunctionList) { + if (skipProfileForFunction(*F)) continue; - runOnFunction(F); + runOnFunction(*F); } + + // Update the data in SampleLoader. + if (SalvageUnusedProfile) + for (auto &I : FuncToProfileNameMap) { + assert(I.first && "New function is null"); + FunctionId FuncName(I.first->getName()); + FuncNameToProfNameMap->emplace(FuncName, I.second); + // We need to remove the old entry to avoid duplicating the function + // processing. + SymbolMap->erase(FuncName); + SymbolMap->emplace(I.second, I.first); + } + if (SalvageStaleProfile) distributeIRToProfileLocationMap(); diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index ac80a31d8fd4bc..e5aebc4850e6db 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -31,9 +31,9 @@ ; CHECK-EP-PIPELINE-START: Running pass: NoOpModulePass ; CHECK-O: Running pass: SampleProfileLoaderPass ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy -; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll index 210a4ef1f76641..0bb26330d000a0 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -44,8 +44,8 @@ ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Running pass: SampleProfileLoaderPass -; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis +; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running pass: OpenMPOptPass ; CHECK-O-NEXT: Running pass: IPSCCPPass diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-renaming-recursive.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-renaming-recursive.prof new file mode 100644 index 00000000000000..edb1404c1d5174 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-renaming-recursive.prof @@ -0,0 +1,11 @@ +main:42:0 + 1: 0 + 6: 2 + 7: 0 + 5: foo:40 + 1: 20 + 2: bar:20 + 1: 20 + !CFGChecksum: 4294967295 + !CFGChecksum: 281479271677951 + !CFGChecksum: 281582264815352 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-renaming.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-renaming.prof new file mode 100644 index 00000000000000..78ff0f322dd0f2 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-renaming.prof @@ -0,0 +1,57 @@ +main:47:0 + 1: 0 + 2: 2 + 3: 0 + 4: 3 + 7: 2 test_noninline:2 + 8: 2 + 9: 0 + 5: foo:24 + 1: 4 + 2: 3 bar:3 + 4: 3 bar:3 + 5: 1 mismatch:1 + 3: baz:15 + 1: 3 + 2: block_only:12 + 1: 3 + 3: 3 + 5: 3 + 10: 3 + !CFGChecksum: 206551239323 + !CFGChecksum: 281479271677951 + !CFGChecksum: 123456 + 6: baz:14 + 1: 3 + 2: block_only:11 + 1: 3 + 3: 3 + 5: 3 + 10: 2 + !CFGChecksum: 206551239323 + !CFGChecksum: 281479271677951 + 10: cold_func:0 + 1: 0 + 2: 0 block_only:0 + !CFGChecksum: 281479271677951 + !CFGChecksum: 1126003093360596 +test_noninline:22:2 + 1: 2 + 2: foo:20 + 1: 3 + 2: 2 bar:3 + 4: 3 bar:3 + 3: baz:13 + 1: 2 + 2: block_only:11 + 1: 2 + 3: 3 + 5: 3 + 10: 3 + !CFGChecksum: 206551239323 + !CFGChecksum: 281479271677951 + !CFGChecksum: 123456 + !CFGChecksum: 281479271677951 +bar:12:12 + 1: 12 + !CFGChecksum: 4294967295 diff --git a/llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll b/llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll index 5394a00ced86ad..3ca94a45636753 100644 --- a/llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll +++ b/llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll @@ -48,18 +48,18 @@ ; } ; } -; CHECK: Run stale profile matching for bar - -; CHECK: Run stale profile matching for foo -; CHECK: Callsite with callee:bar is matched from 1.15 to 1.15 -; CHECK: Callsite with callee:bar is matched from 2 to 2 - ; CHECK: Run stale profile matching for main ; CHECK: Callsite with callee:foo is matched from 4 to 2 ; CHECK: Callsite with callee:bar is matched from 5 to 3 ; CHECK: Callsite with callee:foo is matched from 8 to 4 ; CHECK: Callsite with callee:bar is matched from 9 to 5 +; CHECK: Run stale profile matching for foo +; CHECK: Callsite with callee:bar is matched from 1.15 to 1.15 +; CHECK: Callsite with callee:bar is matched from 2 to 2 + +; CHECK: Run stale profile matching for bar + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-LCS.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-LCS.ll index 4b8cd853301ed1..cdd365b6fb6730 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-LCS.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-LCS.ll @@ -3,17 +3,6 @@ ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-matching-LCS.prof --salvage-stale-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl 2>&1 | FileCheck %s ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-matching-LCS.prof --salvage-stale-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl --salvage-stale-profile-max-callsites=6 2>&1 | FileCheck %s -check-prefix=CHECK-MAX-CALLSITES -; CHECK: Run stale profile matching for test_direct_call -; CHECK: Location is matched from 1 to 1 -; CHECK: Location is matched from 2 to 2 -; CHECK: Location is matched from 3 to 3 -; CHECK: Callsite with callee:C is matched from 4 to 2 -; CHECK: Location is rematched backwards from 3 to 1 -; CHECK: Callsite with callee:A is matched from 5 to 4 -; CHECK: Callsite with callee:B is matched from 6 to 5 -; CHECK: Location is matched from 7 to 6 -; CHECK: Callsite with callee:A is matched from 8 to 6 - ; CHECK: Run stale profile matching for test_indirect_call ; CHECK: Location is matched from 1 to 1 ; CHECK: Location is matched from 2 to 2 @@ -28,6 +17,17 @@ ; CHECK: Callsite with callee:unknown.indirect.callee is matched from 9 to 6 ; CHECK: Callsite with callee:C is matched from 10 to 7 +; CHECK: Run stale profile matching for test_direct_call +; CHECK: Location is matched from 1 to 1 +; CHECK: Location is matched from 2 to 2 +; CHECK: Location is matched from 3 to 3 +; CHECK: Callsite with callee:C is matched from 4 to 2 +; CHECK: Location is rematched backwards from 3 to 1 +; CHECK: Callsite with callee:A is matched from 5 to 4 +; CHECK: Callsite with callee:B is matched from 6 to 5 +; CHECK: Location is matched from 7 to 6 +; CHECK: Callsite with callee:A is matched from 8 to 6 + ; CHECK-MAX-CALLSITES: Skip stale profile matching for test_direct_call ; CHECK-MAX-CALLSITES-NOT: Skip stale profile matching for test_indirect_call diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming-recursive.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming-recursive.ll new file mode 100644 index 00000000000000..d9db804b563644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming-recursive.ll @@ -0,0 +1,150 @@ +; REQUIRES: x86_64-linux +; REQUIRES: asserts +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-renaming-recursive.prof --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -persist-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 2>&1 | FileCheck %s + +; CHECK: Run stale profile matching for main +; CHECK: Function:foo_new matches profile:foo +; CHECK: Run stale profile matching for foo_new +; CHECK: Function:bar_new matches profile:bar +; CHECK: Run stale profile matching for bar_new + +; CHECK: Function processing order: +; CHECK: main +; CHECK: foo_new +; CHECK: bar_new + +; CHECK: 'foo_new' inlined into 'main' to match profiling context with (cost=0, threshold=3000) at callsite main:2:7; +; CHECK: 'bar_new' inlined into 'main' to match profiling context with (cost=-15, threshold=3000) at callsite foo_new:1:3 @ main:2:7; + + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@x = dso_local global i32 0, align 4, !dbg !0 + +; Function Attrs: nounwind uwtable +define dso_local void @bar_new() #0 !dbg !18 { +entry: + call void @llvm.pseudoprobe(i64 8236371237083957767, i64 1, i32 0, i64 -1), !dbg !21 + %0 = load volatile i32, ptr @x, align 4, !dbg !21, !tbaa !22 + %inc = add nsw i32 %0, 1, !dbg !21 + store volatile i32 %inc, ptr @x, align 4, !dbg !21, !tbaa !22 + ret void, !dbg !26 +} + +; Function Attrs: nounwind uwtable +define dso_local void @foo_new() #0 !dbg !27 { +entry: + call void @llvm.pseudoprobe(i64 -837213161392124280, i64 1, i32 0, i64 -1), !dbg !28 + call void @bar_new(), !dbg !29 + ret void, !dbg !31 +} + +; Function Attrs: nounwind uwtable +define dso_local i32 @main() #0 !dbg !32 { +entry: + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !38 + #dbg_value(i32 0, !36, !DIExpression(), !39) + br label %for.cond, !dbg !40 + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ], !dbg !41 + #dbg_value(i32 %i.0, !36, !DIExpression(), !39) + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !42 + %cmp = icmp slt i32 %i.0, 1000000, !dbg !44 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !45 + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !46 + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !47 + ret i32 0, !dbg !47 + +for.body: ; preds = %for.cond + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !48 + call void @foo_new(), !dbg !50 + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !52 + %inc = add nsw i32 %i.0, 1, !dbg !52 + #dbg_value(i32 %inc, !36, !DIExpression(), !39) + br label %for.cond, !dbg !53, !llvm.loop !54 +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare void @llvm.dbg.declare(metadata, metadata, metadata) #2 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #3 + +attributes #0 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } +attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13} +!llvm.ident = !{!14} +!llvm.pseudo_probe_desc = !{!15, !16, !17} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "test.c", directory: "/home/", checksumkind: CSK_MD5, checksum: "48867dcc5b42e2991317c585b7545860") +!4 = !{!0} +!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6) +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !{i32 7, !"Dwarf Version", i32 5} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"wchar_size", i32 4} +!10 = !{i32 8, !"PIC Level", i32 2} +!11 = !{i32 7, !"PIE Level", i32 2} +!12 = !{i32 7, !"uwtable", i32 2} +!13 = !{i32 7, !"debug-info-assignment-tracking", i1 true} +!14 = !{!"clang version 19.0.0"} +!15 = !{i64 8236371237083957767, i64 4294967295, !"bar_new"} +!16 = !{i64 -837213161392124280, i64 281479271677951, !"foo_new"} +!17 = !{i64 -2624081020897602054, i64 281582264815352, !"main"} +!18 = distinct !DISubprogram(name: "bar_new", scope: !3, file: !3, line: 3, type: !19, scopeLine: 3, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!19 = !DISubroutineType(types: !20) +!20 = !{null} +!21 = !DILocation(line: 4, column: 4, scope: !18) +!22 = !{!23, !23, i64 0} +!23 = !{!"int", !24, i64 0} +!24 = !{!"omnipotent char", !25, i64 0} +!25 = !{!"Simple C/C++ TBAA"} +!26 = !DILocation(line: 5, column: 1, scope: !18) +!27 = distinct !DISubprogram(name: "foo_new", scope: !3, file: !3, line: 7, type: !19, scopeLine: 7, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!28 = !DILocation(line: 8, column: 3, scope: !27) +!29 = !DILocation(line: 8, column: 3, scope: !30) +!30 = !DILexicalBlockFile(scope: !27, file: !3, discriminator: 455082007) +!31 = !DILocation(line: 9, column: 1, scope: !27) +!32 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !33, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !35) +!33 = !DISubroutineType(types: !34) +!34 = !{!6} +!35 = !{!36} +!36 = !DILocalVariable(name: "i", scope: !37, file: !3, line: 12, type: !6) +!37 = distinct !DILexicalBlock(scope: !32, file: !3, line: 12, column: 3) +!38 = !DILocation(line: 12, column: 12, scope: !37) +!39 = !DILocation(line: 0, scope: !37) +!40 = !DILocation(line: 12, column: 8, scope: !37) +!41 = !DILocation(line: 12, scope: !37) +!42 = !DILocation(line: 12, column: 19, scope: !43) +!43 = distinct !DILexicalBlock(scope: !37, file: !3, line: 12, column: 3) +!44 = !DILocation(line: 12, column: 21, scope: !43) +!45 = !DILocation(line: 12, column: 3, scope: !37) +!46 = !DILocation(line: 0, scope: !32) +!47 = !DILocation(line: 15, column: 1, scope: !32) +!48 = !DILocation(line: 13, column: 7, scope: !49) +!49 = distinct !DILexicalBlock(scope: !43, file: !3, line: 12, column: 41) +!50 = !DILocation(line: 13, column: 7, scope: !51) +!51 = !DILexicalBlockFile(scope: !49, file: !3, discriminator: 455082031) +!52 = !DILocation(line: 12, column: 37, scope: !43) +!53 = !DILocation(line: 12, column: 3, scope: !43) +!54 = distinct !{!54, !45, !55, !56} +!55 = !DILocation(line: 14, column: 3, scope: !37) +!56 = !{!"llvm.loop.mustprogress"} diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll new file mode 100644 index 00000000000000..a549812f46ef6b --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-renaming.ll @@ -0,0 +1,313 @@ +; REQUIRES: x86_64-linux +; REQUIRES: asserts +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-renaming.prof --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -persist-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 2>&1 | FileCheck %s +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-renaming.prof --salvage-stale-profile --salvage-unused-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl --min-call-count-for-cg-matching=10 --min-func-count-for-cg-matching=10 2>&1 | FileCheck %s --check-prefix=TINY-FUNC + +; Verify find new IR functions. +; CHECK: Function new_block_only is not in profile or profile symbol list. +; CHECK: Function new_foo is not in profile or profile symbol list. + +; CHECK: Run stale profile matching for main +; CHECK: The similarity between new_foo(IR) and foo(profile) is 0.86 +; CHECK: Function:new_foo matches profile:foo +; CHECK: Run stale profile matching for cold_func +; CHECK: The checksums for new_block_only(IR) and block_only(Profile) match. +; CHECK: Function:new_block_only matches profile:block_only +; CHECK: Run stale profile matching for test_noninline +; CHECK: Run stale profile matching for baz +; CHECK: Run stale profile matching for bar + +; CHECK: (2/3) of functions' profile are matched and (55/81) of samples are reused by call graph matching. + +; Verify the matched function is updated correctly by checking the inlining. +; CHECK: 'new_foo' inlined into 'main' to match profiling context with (cost=110, threshold=3000) at callsite main:2:7.5; +; CHECK: 'new_block_only' inlined into 'main' to match profiling context with (cost=75, threshold=3000) at callsite baz:1:3.2 @ main:3:7.6 +; CHECK: 'new_block_only' inlined into 'main' to match profiling context with (cost=75, threshold=3000) at callsite baz:1:3.2 @ new_foo:2:3.3 @ main:2:7.5; +; CHECK: 'new_foo' inlined into 'test_noninline' to match profiling context with (cost=110, threshold=3000) at callsite test_noninline:1:3.2; + +; CHECK: !"NumCallGraphRecoveredProfiledFunc", i64 2, !"NumCallGraphRecoveredFuncSamples", i64 55 + +; TINY-FUNC-NOT: Function:new_foo matches profile:foo +; TINY-FUNC-NOT: Function:new_block_only matches profile:block_only + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@x = dso_local global i32 0, align 4, !dbg !0 + +; Function Attrs: noinline nounwind uwtable +define dso_local i32 @bar(i32 noundef %x) #0 !dbg !22 { +entry: + #dbg_value(i32 %x, !26, !DIExpression(), !27) + call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !28 + %add = add nsw i32 %x, 1, !dbg !29 + ret i32 %add, !dbg !30 +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; Function Attrs: nounwind uwtable +define dso_local void @new_block_only() #2 !dbg !31 { +entry: + call void @llvm.pseudoprobe(i64 2964250471062803127, i64 1, i32 0, i64 -1), !dbg !34 + %0 = load volatile i32, ptr @x, align 4, !dbg !34, !tbaa !36 + %cmp = icmp eq i32 %0, 9999, !dbg !40 + br i1 %cmp, label %if.then, label %if.else, !dbg !41 + +if.then: ; preds = %entry + call void @llvm.pseudoprobe(i64 2964250471062803127, i64 2, i32 0, i64 -1), !dbg !42 + %1 = load volatile i32, ptr @x, align 4, !dbg !42, !tbaa !36 + %add = add nsw i32 %1, 1000, !dbg !42 + store volatile i32 %add, ptr @x, align 4, !dbg !42, !tbaa !36 + br label %if.end10, !dbg !43 + +if.else: ; preds = %entry + call void @llvm.pseudoprobe(i64 2964250471062803127, i64 3, i32 0, i64 -1), !dbg !44 + %2 = load volatile i32, ptr @x, align 4, !dbg !44, !tbaa !36 + %cmp1 = icmp eq i32 %2, 999, !dbg !46 + br i1 %cmp1, label %if.then2, label %if.else4, !dbg !47 + +if.then2: ; preds = %if.else + call void @llvm.pseudoprobe(i64 2964250471062803127, i64 4, i32 0, i64 -1), !dbg !48 + %3 = load volatile i32, ptr @x, align 4, !dbg !48, !tbaa !36 + %add3 = add nsw i32 %3, 100, !dbg !48 + store volatile i32 %add3, ptr @x, align 4, !dbg !48, !tbaa !36 + br label %if.end10, !dbg !49 + +if.else4: ; preds = %if.else + call void @llvm.pseudoprobe(i64 2964250471062803127, i64 5, i32 0, i64 -1), !dbg !50 + %4 = load volatile i32, ptr @x, align 4, !dbg !50, !tbaa !36 + %cmp5 = icmp eq i32 %4, 99, !dbg !52 + br i1 %cmp5, label %if.then6, label %if.else8, !dbg !53 + +if.then6: ; preds = %if.else4 + call void @llvm.pseudoprobe(i64 2964250471062803127, i64 6, i32 0, i64 -1), !dbg !54 + %5 = load volatile i32, ptr @x, align 4, !dbg !54, !tbaa !36 + %add7 = add nsw i32 %5, 10, !dbg !54 + store volatile i32 %add7, ptr @x, align 4, !dbg !54, !tbaa !36 + br label %if.end10, !dbg !55 + +if.else8: ; preds = %if.else4 + call void @llvm.pseudoprobe(i64 2964250471062803127, i64 7, i32 0, i64 -1), !dbg !56 + %6 = load volatile i32, ptr @x, align 4, !dbg !56, !tbaa !36 + %inc = add nsw i32 %6, 1, !dbg !56 + store volatile i32 %inc, ptr @x, align 4, !dbg !56, !tbaa !36 + br label %if.end10 + +if.end10: ; preds = %if.then2, %if.else8, %if.then6, %if.then + call void @llvm.pseudoprobe(i64 2964250471062803127, i64 10, i32 0, i64 -1), !dbg !57 + ret void, !dbg !57 +} + +; Function Attrs: nounwind uwtable +define dso_local void @baz() #2 !dbg !58 { +entry: + call void @llvm.pseudoprobe(i64 7546896869197086323, i64 1, i32 0, i64 -1), !dbg !59 + call void @new_block_only(), !dbg !60 + ret void, !dbg !62 +} + +; Function Attrs: nounwind uwtable +define dso_local void @new_foo() #2 !dbg !63 { +entry: + call void @llvm.pseudoprobe(i64 5381804724291869009, i64 1, i32 0, i64 -1), !dbg !64 + %0 = load volatile i32, ptr @x, align 4, !dbg !64, !tbaa !36 + %call = call i32 @bar(i32 noundef %0), !dbg !65 + %1 = load volatile i32, ptr @x, align 4, !dbg !67, !tbaa !36 + %add = add nsw i32 %1, %call, !dbg !67 + store volatile i32 %add, ptr @x, align 4, !dbg !67, !tbaa !36 + call void @baz(), !dbg !68 + %2 = load volatile i32, ptr @x, align 4, !dbg !70, !tbaa !36 + %call1 = call i32 @bar(i32 noundef %2), !dbg !71 + %3 = load volatile i32, ptr @x, align 4, !dbg !73, !tbaa !36 + %add2 = add nsw i32 %3, %call1, !dbg !73 + store volatile i32 %add2, ptr @x, align 4, !dbg !73, !tbaa !36 + ret void, !dbg !74 +} + +; Function Attrs: noinline nounwind uwtable +define dso_local void @test_noninline() #0 !dbg !75 { +entry: + call void @llvm.pseudoprobe(i64 -5610330892148506720, i64 1, i32 0, i64 -1), !dbg !76 + call void @new_foo(), !dbg !77 + ret void, !dbg !79 +} + +; Function Attrs: nounwind uwtable +define dso_local void @cold_func() #2 !dbg !80 { +entry: + call void @llvm.pseudoprobe(i64 2711072140522378707, i64 1, i32 0, i64 -1), !dbg !81 + call void @new_block_only(), !dbg !82 + ret void, !dbg !84 +} + +; Function Attrs: nounwind uwtable +define dso_local i32 @main() #2 !dbg !85 { +entry: + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !91 + #dbg_value(i32 0, !89, !DIExpression(), !92) + br label %for.cond, !dbg !93 + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ], !dbg !94 + #dbg_value(i32 %i.0, !89, !DIExpression(), !92) + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !95 + %cmp = icmp slt i32 %i.0, 1000000, !dbg !97 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !98 + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !99 + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 9, i32 0, i64 -1), !dbg !100 + call void @cold_func(), !dbg !101 + ret i32 0, !dbg !103 + +for.body: ; preds = %for.cond + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !104 + call void @new_foo(), !dbg !106 + call void @baz(), !dbg !108 + call void @test_noninline(), !dbg !110 + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 8, i32 0, i64 -1), !dbg !112 + %inc = add nsw i32 %i.0, 1, !dbg !112 + #dbg_value(i32 %inc, !89, !DIExpression(), !92) + br label %for.cond, !dbg !113, !llvm.loop !114 +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #3 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #3 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #4 + +attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } +attributes #1 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #2 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } +attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #4 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13} +!llvm.ident = !{!14} +!llvm.pseudo_probe_desc = !{!15, !16, !17, !18, !19, !20, !21} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 19.0.0git (https://github.com/llvm/llvm-project.git 2e1509152224d8ffbeac84c489920dcbaeefc2b2)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "test_rename.c", directory: "/home/wlei/local/toytest/rename", checksumkind: CSK_MD5, checksum: "b07f600b3cdefd40bd44932bc13c33f5") +!4 = !{!0} +!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6) +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !{i32 7, !"Dwarf Version", i32 5} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"wchar_size", i32 4} +!10 = !{i32 8, !"PIC Level", i32 2} +!11 = !{i32 7, !"PIE Level", i32 2} +!12 = !{i32 7, !"uwtable", i32 2} +!13 = !{i32 7, !"debug-info-assignment-tracking", i1 true} +!14 = !{!"clang version 19.0.0git (https://github.com/llvm/llvm-project.git 2e1509152224d8ffbeac84c489920dcbaeefc2b2)"} +!15 = !{i64 -2012135647395072713, i64 4294967295, !"bar"} +!16 = !{i64 2964250471062803127, i64 206551239323, !"new_block_only"} +!17 = !{i64 7546896869197086323, i64 281479271677951, !"baz"} +!18 = !{i64 5381804724291869009, i64 844429225099263, !"new_foo"} +!19 = !{i64 -5610330892148506720, i64 281479271677951, !"test_noninline"} +!20 = !{i64 2711072140522378707, i64 281479271677951, !"cold_func"} +!21 = !{i64 -2624081020897602054, i64 1126003093360596, !"main"} +!22 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 3, type: !23, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !25) +!23 = !DISubroutineType(types: !24) +!24 = !{!6, !6} +!25 = !{!26} +!26 = !DILocalVariable(name: "x", arg: 1, scope: !22, file: !3, line: 3, type: !6) +!27 = !DILocation(line: 0, scope: !22) +!28 = !DILocation(line: 4, column: 10, scope: !22) +!29 = !DILocation(line: 4, column: 12, scope: !22) +!30 = !DILocation(line: 4, column: 3, scope: !22) +!31 = distinct !DISubprogram(name: "new_block_only", scope: !3, file: !3, line: 7, type: !32, scopeLine: 7, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!32 = !DISubroutineType(types: !33) +!33 = !{null} +!34 = !DILocation(line: 8, column: 6, scope: !35) +!35 = distinct !DILexicalBlock(scope: !31, file: !3, line: 8, column: 6) +!36 = !{!37, !37, i64 0} +!37 = !{!"int", !38, i64 0} +!38 = !{!"omnipotent char", !39, i64 0} +!39 = !{!"Simple C/C++ TBAA"} +!40 = !DILocation(line: 8, column: 8, scope: !35) +!41 = !DILocation(line: 8, column: 6, scope: !31) +!42 = !DILocation(line: 9, column: 7, scope: !35) +!43 = !DILocation(line: 9, column: 5, scope: !35) +!44 = !DILocation(line: 10, column: 12, scope: !45) +!45 = distinct !DILexicalBlock(scope: !35, file: !3, line: 10, column: 12) +!46 = !DILocation(line: 10, column: 14, scope: !45) +!47 = !DILocation(line: 10, column: 12, scope: !35) +!48 = !DILocation(line: 11, column: 7, scope: !45) +!49 = !DILocation(line: 11, column: 5, scope: !45) +!50 = !DILocation(line: 12, column: 12, scope: !51) +!51 = distinct !DILexicalBlock(scope: !45, file: !3, line: 12, column: 12) +!52 = !DILocation(line: 12, column: 14, scope: !51) +!53 = !DILocation(line: 12, column: 12, scope: !45) +!54 = !DILocation(line: 13, column: 7, scope: !51) +!55 = !DILocation(line: 13, column: 5, scope: !51) +!56 = !DILocation(line: 15, column: 6, scope: !51) +!57 = !DILocation(line: 16, column: 1, scope: !31) +!58 = distinct !DISubprogram(name: "baz", scope: !3, file: !3, line: 18, type: !32, scopeLine: 18, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!59 = !DILocation(line: 19, column: 3, scope: !58) +!60 = !DILocation(line: 19, column: 3, scope: !61) +!61 = !DILexicalBlockFile(scope: !58, file: !3, discriminator: 186646551) +!62 = !DILocation(line: 20, column: 1, scope: !58) +!63 = distinct !DISubprogram(name: "new_foo", scope: !3, file: !3, line: 22, type: !32, scopeLine: 22, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!64 = !DILocation(line: 23, column: 12, scope: !63) +!65 = !DILocation(line: 23, column: 8, scope: !66) +!66 = !DILexicalBlockFile(scope: !63, file: !3, discriminator: 186646551) +!67 = !DILocation(line: 23, column: 5, scope: !63) +!68 = !DILocation(line: 24, column: 3, scope: !69) +!69 = !DILexicalBlockFile(scope: !63, file: !3, discriminator: 186646559) +!70 = !DILocation(line: 25, column: 12, scope: !63) +!71 = !DILocation(line: 25, column: 8, scope: !72) +!72 = !DILexicalBlockFile(scope: !63, file: !3, discriminator: 186646567) +!73 = !DILocation(line: 25, column: 5, scope: !63) +!74 = !DILocation(line: 26, column: 1, scope: !63) +!75 = distinct !DISubprogram(name: "test_noninline", scope: !3, file: !3, line: 28, type: !32, scopeLine: 28, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!76 = !DILocation(line: 29, column: 3, scope: !75) +!77 = !DILocation(line: 29, column: 3, scope: !78) +!78 = !DILexicalBlockFile(scope: !75, file: !3, discriminator: 186646551) +!79 = !DILocation(line: 30, column: 1, scope: !75) +!80 = distinct !DISubprogram(name: "cold_func", scope: !3, file: !3, line: 32, type: !32, scopeLine: 32, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!81 = !DILocation(line: 32, column: 20, scope: !80) +!82 = !DILocation(line: 32, column: 20, scope: !83) +!83 = !DILexicalBlockFile(scope: !80, file: !3, discriminator: 186646551) +!84 = !DILocation(line: 32, column: 37, scope: !80) +!85 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 34, type: !86, scopeLine: 34, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !88) +!86 = !DISubroutineType(types: !87) +!87 = !{!6} +!88 = !{!89} +!89 = !DILocalVariable(name: "i", scope: !90, file: !3, line: 35, type: !6) +!90 = distinct !DILexicalBlock(scope: !85, file: !3, line: 35, column: 3) +!91 = !DILocation(line: 35, column: 12, scope: !90) +!92 = !DILocation(line: 0, scope: !90) +!93 = !DILocation(line: 35, column: 8, scope: !90) +!94 = !DILocation(line: 35, scope: !90) +!95 = !DILocation(line: 35, column: 19, scope: !96) +!96 = distinct !DILexicalBlock(scope: !90, file: !3, line: 35, column: 3) +!97 = !DILocation(line: 35, column: 21, scope: !96) +!98 = !DILocation(line: 35, column: 3, scope: !90) +!99 = !DILocation(line: 0, scope: !85) +!100 = !DILocation(line: 40, column: 3, scope: !85) +!101 = !DILocation(line: 40, column: 3, scope: !102) +!102 = !DILexicalBlockFile(scope: !85, file: !3, discriminator: 186646615) +!103 = !DILocation(line: 41, column: 1, scope: !85) +!104 = !DILocation(line: 36, column: 7, scope: !105) +!105 = distinct !DILexicalBlock(scope: !96, file: !3, line: 35, column: 41) +!106 = !DILocation(line: 36, column: 7, scope: !107) +!107 = !DILexicalBlockFile(scope: !105, file: !3, discriminator: 186646575) +!108 = !DILocation(line: 37, column: 7, scope: !109) +!109 = !DILexicalBlockFile(scope: !105, file: !3, discriminator: 186646583) +!110 = !DILocation(line: 38, column: 7, scope: !111) +!111 = !DILexicalBlockFile(scope: !105, file: !3, discriminator: 186646591) +!112 = !DILocation(line: 35, column: 37, scope: !96) +!113 = !DILocation(line: 35, column: 3, scope: !96) +!114 = distinct !{!114, !98, !115, !116} +!115 = !DILocation(line: 39, column: 3, scope: !90) +!116 = !{!"llvm.loop.mustprogress"}