From 9f8205d9d8ddccd5c821c2a654805434706a43c2 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Thu, 11 Jul 2024 16:10:30 -0700 Subject: [PATCH] [MemProf] Track and report profiled sizes through cloning (#98382) If requested, via the -memprof-report-hinted-sizes option, track the total profiled size of each MIB through the thin link, then report on the corresponding allocation coldness after all cloning is complete. To save size, a different bitcode record type is used for the allocation info when the option is specified, and the sizes are kept separate from the MIBs in the index. --- llvm/include/llvm/Bitcode/LLVMBitCodes.h | 5 +- llvm/include/llvm/IR/ModuleSummaryIndex.h | 16 +++- llvm/lib/Analysis/ModuleSummaryAnalysis.cpp | 12 +++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 32 ++++++- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 19 +++-- .../IPO/MemProfContextDisambiguation.cpp | 83 +++++++++++++++---- llvm/test/Bitcode/summary_version.ll | 2 +- .../thinlto-func-summary-vtableref-pgo.ll | 2 +- llvm/test/ThinLTO/X86/memprof-basic.ll | 15 ++-- .../MemProfContextDisambiguation/basic.ll | 9 +- 10 files changed, 161 insertions(+), 34 deletions(-) diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 5b5e08b5cbc3f3..184bbe32df695c 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -307,7 +307,8 @@ enum GlobalValueSummarySymtabCodes { // [valueid, n x stackidindex] FS_PERMODULE_CALLSITE_INFO = 26, // Summary of per-module allocation memprof metadata. - // [n x (alloc type, nummib, nummib x stackidindex)] + // [nummib, nummib x (alloc type, numstackids, numstackids x stackidindex), + // [nummib x total size]?] FS_PERMODULE_ALLOC_INFO = 27, // Summary of combined index memprof callsite metadata. // [valueid, numstackindices, numver, @@ -316,7 +317,7 @@ enum GlobalValueSummarySymtabCodes { // Summary of combined index allocation memprof metadata. // [nummib, numver, // nummib x (alloc type, numstackids, numstackids x stackidindex), - // numver x version] + // numver x version, [nummib x total size]?] FS_COMBINED_ALLOC_INFO = 29, FS_STACK_IDS = 30, }; diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index 31271ed388e54f..00934cc1ce6f2d 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -403,6 +403,10 @@ struct AllocInfo { // Vector of MIBs in this memprof metadata. std::vector MIBs; + // If requested, keep track of total profiled sizes for each MIB. This will be + // a vector of the same length and order as the MIBs vector, if non-empty. + std::vector TotalSizes; + AllocInfo(std::vector MIBs) : MIBs(std::move(MIBs)) { Versions.push_back(0); } @@ -423,6 +427,16 @@ inline raw_ostream &operator<<(raw_ostream &OS, const AllocInfo &AE) { for (auto &M : AE.MIBs) { OS << "\t\t" << M << "\n"; } + if (!AE.TotalSizes.empty()) { + OS << " TotalSizes per MIB:\n\t\t"; + First = true; + for (uint64_t TS : AE.TotalSizes) { + if (!First) + OS << ", "; + First = false; + OS << TS << "\n"; + } + } return OS; } @@ -1431,7 +1445,7 @@ class ModuleSummaryIndex { // in the way some record are interpreted, like flags for instance. // Note that incrementing this may require changes in both BitcodeReader.cpp // and BitcodeWriter.cpp. - static constexpr uint64_t BitcodeSummaryVersion = 9; + static constexpr uint64_t BitcodeSummaryVersion = 10; // Regular LTO module name for ASM writer static constexpr const char *getRegularLTOModuleName() { diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index 94ac0484f5ec7f..e9490ccba82157 100644 --- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -85,6 +85,8 @@ extern cl::opt ScalePartialSampleProfileWorkingSetSize; extern cl::opt MaxNumVTableAnnotations; +extern cl::opt MemProfReportHintedSizes; + // Walk through the operands of a given User via worklist iteration and populate // the set of GlobalValue references encountered. Invoked either on an // Instruction or a GlobalVariable (which walks its initializer). @@ -517,6 +519,7 @@ static void computeFunctionSummary( auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof); if (MemProfMD) { std::vector MIBs; + std::vector TotalSizes; for (auto &MDOp : MemProfMD->operands()) { auto *MIBMD = cast(MDOp); MDNode *StackNode = getMIBStackNode(MIBMD); @@ -536,8 +539,17 @@ static void computeFunctionSummary( } MIBs.push_back( MIBInfo(getMIBAllocType(MIBMD), std::move(StackIdIndices))); + if (MemProfReportHintedSizes) { + auto TotalSize = getMIBTotalSize(MIBMD); + assert(TotalSize); + TotalSizes.push_back(TotalSize); + } } Allocs.push_back(AllocInfo(std::move(MIBs))); + if (MemProfReportHintedSizes) { + assert(Allocs.back().MIBs.size() == TotalSizes.size()); + Allocs.back().TotalSizes = std::move(TotalSizes); + } } else if (!InstCallsite.empty()) { SmallVector StackIdIndices; for (auto StackId : InstCallsite) diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index f56b2b32ff98f5..6203c6e5119d18 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -7994,7 +7994,12 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { case bitc::FS_PERMODULE_ALLOC_INFO: { unsigned I = 0; std::vector MIBs; - while (I < Record.size()) { + unsigned NumMIBs = 0; + if (Version >= 10) + NumMIBs = Record[I++]; + unsigned MIBsRead = 0; + while ((Version >= 10 && MIBsRead++ < NumMIBs) || + (Version < 10 && I < Record.size())) { assert(Record.size() - I >= 2); AllocationType AllocType = (AllocationType)Record[I++]; unsigned NumStackEntries = Record[I++]; @@ -8007,7 +8012,19 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { } MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList))); } + std::vector TotalSizes; + // We either have no sizes or NumMIBs of them. + assert(I == Record.size() || Record.size() - I == NumMIBs); + if (I < Record.size()) { + MIBsRead = 0; + while (MIBsRead++ < NumMIBs) + TotalSizes.push_back(Record[I++]); + } PendingAllocs.push_back(AllocInfo(std::move(MIBs))); + if (!TotalSizes.empty()) { + assert(PendingAllocs.back().MIBs.size() == TotalSizes.size()); + PendingAllocs.back().TotalSizes = std::move(TotalSizes); + } break; } @@ -8034,8 +8051,21 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { SmallVector Versions; for (unsigned J = 0; J < NumVersions; J++) Versions.push_back(Record[I++]); + std::vector TotalSizes; + // We either have no sizes or NumMIBs of them. + assert(I == Record.size() || Record.size() - I == NumMIBs); + if (I < Record.size()) { + MIBsRead = 0; + while (MIBsRead++ < NumMIBs) { + TotalSizes.push_back(Record[I++]); + } + } PendingAllocs.push_back( AllocInfo(std::move(Versions), std::move(MIBs))); + if (!TotalSizes.empty()) { + assert(PendingAllocs.back().MIBs.size() == TotalSizes.size()); + PendingAllocs.back().TotalSizes = std::move(TotalSizes); + } break; } } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 3378931065f9b3..b3ebe70e8c52fc 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -4189,10 +4189,9 @@ static void writeFunctionHeapProfileRecords( // Per module alloc versions should always have a single entry of // value 0. assert(!PerModule || (AI.Versions.size() == 1 && AI.Versions[0] == 0)); - if (!PerModule) { - Record.push_back(AI.MIBs.size()); + Record.push_back(AI.MIBs.size()); + if (!PerModule) Record.push_back(AI.Versions.size()); - } for (auto &MIB : AI.MIBs) { Record.push_back((uint8_t)MIB.AllocType); Record.push_back(MIB.StackIdIndices.size()); @@ -4203,6 +4202,11 @@ static void writeFunctionHeapProfileRecords( for (auto V : AI.Versions) Record.push_back(V); } + assert(AI.TotalSizes.empty() || AI.TotalSizes.size() == AI.MIBs.size()); + if (!AI.TotalSizes.empty()) { + for (auto Size : AI.TotalSizes) + Record.push_back(Size); + } Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_ALLOC_INFO : bitc::FS_COMBINED_ALLOC_INFO, Record, AllocAbbrev); @@ -4432,7 +4436,9 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() { Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_ALLOC_INFO)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib // n x (alloc type, numstackids, numstackids x stackidindex) + // optional: nummib x total size Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv)); @@ -4576,6 +4582,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver // nummib x (alloc type, numstackids, numstackids x stackidindex), // numver x version + // optional: nummib x total size Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv)); @@ -4675,7 +4682,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { writeFunctionHeapProfileRecords( Stream, FS, CallsiteAbbrev, AllocAbbrev, /*PerModule*/ false, - /*GetValueId*/ [&](const ValueInfo &VI) -> unsigned { + /*GetValueId*/ + [&](const ValueInfo &VI) -> unsigned { std::optional ValueID = GetValueId(VI); // This can happen in shared index files for distributed ThinLTO if // the callee function summary is not included. Record 0 which we @@ -4685,7 +4693,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { return 0; return *ValueID; }, - /*GetStackIndex*/ [&](unsigned I) { + /*GetStackIndex*/ + [&](unsigned I) { // Get the corresponding index into the list of StackIds actually // being written for this combined index (which may be a subset in // the case of distributed indexes). diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index c1e5ab1a2b5618..ef9ddeaaab632c 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -134,6 +134,8 @@ cl::opt SupportsHotColdNew( cl::desc("Linking with hot/cold operator new interfaces")); } // namespace llvm +extern cl::opt MemProfReportHintedSizes; + namespace { /// CRTP base for graphs built from either IR or ThinLTO summary index. /// @@ -172,6 +174,7 @@ class CallsiteContextGraph { void dump() const; void print(raw_ostream &OS) const; + void printTotalSizes(raw_ostream &OS) const; friend raw_ostream &operator<<(raw_ostream &OS, const CallsiteContextGraph &CCG) { @@ -439,7 +442,7 @@ class CallsiteContextGraph { void addStackNodesForMIB(ContextNode *AllocNode, CallStack &StackContext, CallStack &CallsiteContext, - AllocationType AllocType); + AllocationType AllocType, uint64_t TotalSize); /// Matches all callsite metadata (or summary) to the nodes created for /// allocation memprof MIB metadata, synthesizing new nodes to reflect any @@ -611,6 +614,10 @@ class CallsiteContextGraph { /// Map from each context ID to the AllocationType assigned to that context. DenseMap ContextIdToAllocationType; + /// Map from each contextID to the profiled aggregate allocation size, + /// optionally populated when requested (via MemProfReportHintedSizes). + DenseMap ContextIdToTotalSize; + /// Identifies the context node created for a stack id when adding the MIB /// contexts to the graph. This is used to locate the context nodes when /// trying to assign the corresponding callsites with those stack ids to these @@ -1004,11 +1011,24 @@ CallsiteContextGraph::addAllocNode( return AllocNode; } +static std::string getAllocTypeString(uint8_t AllocTypes) { + if (!AllocTypes) + return "None"; + std::string Str; + if (AllocTypes & (uint8_t)AllocationType::NotCold) + Str += "NotCold"; + if (AllocTypes & (uint8_t)AllocationType::Cold) + Str += "Cold"; + return Str; +} + template template void CallsiteContextGraph::addStackNodesForMIB( ContextNode *AllocNode, CallStack &StackContext, - CallStack &CallsiteContext, AllocationType AllocType) { + CallStack &CallsiteContext, AllocationType AllocType, + uint64_t TotalSize) { + assert(!MemProfReportHintedSizes || TotalSize > 0); // Treating the hot alloc type as NotCold before the disambiguation for "hot" // is done. if (AllocType == AllocationType::Hot) @@ -1016,6 +1036,11 @@ void CallsiteContextGraph::addStackNodesForMIB( ContextIdToAllocationType[++LastContextId] = AllocType; + if (MemProfReportHintedSizes) { + assert(TotalSize); + ContextIdToTotalSize[LastContextId] = TotalSize; + } + // Update alloc type and context ids for this MIB. AllocNode->AllocTypes |= (uint8_t)AllocType; @@ -1060,6 +1085,10 @@ CallsiteContextGraph::duplicateContextIds( assert(ContextIdToAllocationType.count(OldId)); // The new context has the same allocation type as original. ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId]; + // For now set this to 0 so we don't duplicate sizes. Not clear how to divvy + // up the size. Assume that if we are able to duplicate context ids that we + // will be able to disambiguate all copies. + ContextIdToTotalSize[LastContextId] = 0; } return NewContextIds; } @@ -1663,7 +1692,7 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph( CallStack StackContext(StackNode); addStackNodesForMIB( AllocNode, StackContext, CallsiteContext, - getMIBAllocType(MIBMD)); + getMIBAllocType(MIBMD), getMIBTotalSize(MIBMD)); } assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None); // Memprof and callsite metadata on memory allocations no longer @@ -1735,12 +1764,20 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph( // stack ids on the allocation call during ModuleSummaryAnalysis. CallStack::const_iterator> EmptyContext; + unsigned I = 0; + assert(!MemProfReportHintedSizes || + AN.TotalSizes.size() == AN.MIBs.size()); // Now add all of the MIBs and their stack nodes. for (auto &MIB : AN.MIBs) { CallStack::const_iterator> StackContext(&MIB); + uint64_t TotalSize = 0; + if (MemProfReportHintedSizes) + TotalSize = AN.TotalSizes[I]; addStackNodesForMIB::const_iterator>( - AllocNode, StackContext, EmptyContext, MIB.AllocType); + AllocNode, StackContext, EmptyContext, MIB.AllocType, + TotalSize); + I++; } assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None); // Initialize version 0 on the summary alloc node to the current alloc @@ -2171,17 +2208,6 @@ bool IndexCallsiteContextGraph::calleeMatchesFunc( return true; } -static std::string getAllocTypeString(uint8_t AllocTypes) { - if (!AllocTypes) - return "None"; - std::string Str; - if (AllocTypes & (uint8_t)AllocationType::NotCold) - Str += "NotCold"; - if (AllocTypes & (uint8_t)AllocationType::Cold) - Str += "Cold"; - return Str; -} - template void CallsiteContextGraph::ContextNode::dump() const { @@ -2261,6 +2287,30 @@ void CallsiteContextGraph::print( } } +template +void CallsiteContextGraph::printTotalSizes( + raw_ostream &OS) const { + using GraphType = const CallsiteContextGraph *; + for (const auto Node : nodes(this)) { + if (Node->isRemoved()) + continue; + if (!Node->IsAllocation) + continue; + DenseSet ContextIds = Node->getContextIds(); + std::vector SortedIds(ContextIds.begin(), ContextIds.end()); + std::sort(SortedIds.begin(), SortedIds.end()); + for (auto Id : SortedIds) { + auto SizeI = ContextIdToTotalSize.find(Id); + assert(SizeI != ContextIdToTotalSize.end()); + auto TypeI = ContextIdToAllocationType.find(Id); + assert(TypeI != ContextIdToAllocationType.end()); + OS << getAllocTypeString((uint8_t)TypeI->second) << " context " << Id + << " with total size " << SizeI->second << " is " + << getAllocTypeString(Node->AllocTypes) << " after cloning\n"; + } + } +} + template void CallsiteContextGraph::check() const { using GraphType = const CallsiteContextGraph *; @@ -3797,6 +3847,9 @@ bool CallsiteContextGraph::process() { if (ExportToDot) exportToDot("clonefuncassign"); + if (MemProfReportHintedSizes) + printTotalSizes(errs()); + return Changed; } diff --git a/llvm/test/Bitcode/summary_version.ll b/llvm/test/Bitcode/summary_version.ll index 98feab6fe2f995..26c64f81a773f1 100644 --- a/llvm/test/Bitcode/summary_version.ll +++ b/llvm/test/Bitcode/summary_version.ll @@ -2,7 +2,7 @@ ; RUN: opt -module-summary %s -o - | llvm-bcanalyzer -dump | FileCheck %s ; CHECK: +; CHECK: diff --git a/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll b/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll index 19e228fd5355c7..b3f1e770810d2d 100644 --- a/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll +++ b/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll @@ -11,7 +11,7 @@ ; RUN: llvm-dis -o - %t.o | llvm-as -o - | llvm-dis -o - | FileCheck %s --check-prefix=DIS ; CHECK: +; CHECK-NEXT: ; The `VALUE_GUID` below represents the "_ZTV4Base" referenced by the instruction ; that loads vtable pointers. diff --git a/llvm/test/ThinLTO/X86/memprof-basic.ll b/llvm/test/ThinLTO/X86/memprof-basic.ll index 54e01e5fcdf955..6922dbfd368467 100644 --- a/llvm/test/ThinLTO/X86/memprof-basic.ll +++ b/llvm/test/ThinLTO/X86/memprof-basic.ll @@ -34,7 +34,7 @@ ;; -stats requires asserts ; REQUIRES: asserts -; RUN: opt -thinlto-bc %s >%t.o +; RUN: opt -thinlto-bc -memprof-report-hinted-sizes %s >%t.o ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ ; RUN: -supports-hot-cold-new \ ; RUN: -r=%t.o,main,plx \ @@ -43,9 +43,11 @@ ; RUN: -r=%t.o,_Znam, \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ ; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: -memprof-report-hinted-sizes \ ; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \ ; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \ -; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS +; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS \ +; RUN: --check-prefix=SIZES ; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT ;; We should have cloned bar, baz, and foo, for the cold memory allocation. @@ -64,9 +66,10 @@ ; RUN: -r=%t.o,_Znam, \ ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ ; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t2. \ +; RUN: -memprof-report-hinted-sizes \ ; RUN: -stats -pass-remarks=memprof-context-disambiguation \ ; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \ -; RUN: --check-prefix=STATS +; RUN: --check-prefix=STATS --check-prefix=SIZES ; RUN: cat %t2.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT ;; We should have cloned bar, baz, and foo, for the cold memory allocation. @@ -125,9 +128,9 @@ attributes #0 = { noinline optnone } !0 = !{i64 8632435727821051414} !1 = !{i64 -3421689549917153178} !2 = !{!3, !5} -!3 = !{!4, !"notcold"} +!3 = !{!4, !"notcold", i64 100} !4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} -!5 = !{!6, !"cold"} +!5 = !{!6, !"cold", i64 400} !6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} !7 = !{i64 9086428284934609951} !8 = !{i64 -5964873800580613432} @@ -264,6 +267,8 @@ attributes #0 = { noinline optnone } ; DUMP: Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2 ; DUMP: Clone of [[BAR]] +; SIZES: NotCold context 1 with total size 100 is NotCold after cloning +; SIZES: Cold context 2 with total size 400 is Cold after cloning ; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1 ; REMARKS: created clone _Z3barv.memprof.1 diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll index 483582c6ced95a..a82f872d51c7d5 100644 --- a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll @@ -38,8 +38,9 @@ ; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ ; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ ; RUN: -stats -pass-remarks=memprof-context-disambiguation \ +; RUN: -memprof-report-hinted-sizes \ ; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR \ -; RUN: --check-prefix=STATS --check-prefix=REMARKS +; RUN: --check-prefix=STATS --check-prefix=REMARKS --check-prefix=SIZES ; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT ;; We should have cloned bar, baz, and foo, for the cold memory allocation. @@ -105,9 +106,9 @@ attributes #6 = { builtin } !0 = !{i64 8632435727821051414} !1 = !{i64 -3421689549917153178} !2 = !{!3, !5} -!3 = !{!4, !"notcold"} +!3 = !{!4, !"notcold", i64 100} !4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} -!5 = !{!6, !"cold"} +!5 = !{!6, !"cold", i64 400} !6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} !7 = !{i64 9086428284934609951} !8 = !{i64 -5964873800580613432} @@ -248,6 +249,8 @@ attributes #6 = { builtin } ; REMARKS: call in clone _Z3bazv assigned to call function clone _Z3barv ; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold +; SIZES: NotCold context 1 with total size 100 is NotCold after cloning +; SIZES: Cold context 2 with total size 400 is Cold after cloning ; IR: define {{.*}} @main ;; The first call to foo does not allocate cold memory. It should call the