Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BOLT] Hash-based function matching #95821

Merged
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion bolt/docs/CommandLineArgumentReference.md
Original file line number Diff line number Diff line change
Expand Up @@ -798,6 +798,10 @@
bytes. Default value: 0, i.e. split iff the size is reduced. Note that on some
architectures the size can increase after splitting.

- `--match-profile-with-function-hash`

Turns on matching functions with exact hash

- `--stale-matching-max-func-size=<uint>`

The maximum size of a function to consider for inference.
Expand Down Expand Up @@ -1161,4 +1165,4 @@

- `--print-options`

Print non-default options after command line parsing
shawbyoung marked this conversation as resolved.
Show resolved Hide resolved
Print non-default options after command line parsing
4 changes: 3 additions & 1 deletion bolt/include/bolt/Rewrite/DWARFRewriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "bolt/Core/DIEBuilder.h"
#include "bolt/Core/DebugData.h"
#include "bolt/Core/DebugNames.h"
#include "bolt/Core/GDBIndex.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/DWP/DWP.h"
Expand Down Expand Up @@ -131,7 +132,8 @@ class DWARFRewriter {
makeFinalLocListsSection(DWARFVersion Version);

/// Finalize type sections in the main binary.
CUOffsetMap finalizeTypeSections(DIEBuilder &DIEBlder, DIEStreamer &Streamer);
CUOffsetMap finalizeTypeSections(DIEBuilder &DIEBlder, DIEStreamer &Streamer,
GDBIndex &GDBIndexSection);

/// Process and write out CUs that are passsed in.
void finalizeCompileUnits(DIEBuilder &DIEBlder, DIEStreamer &Streamer,
Expand Down
1 change: 1 addition & 0 deletions bolt/lib/Core/BinaryEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
// This assumes the second instruction in the macro-op pair will get
// assigned to its own MCRelaxableFragment. Since all JCC instructions
// are relaxable, we should be safe.
Streamer.emitNeverAlignCodeAtEnd(/*Alignment to avoid=*/64, *BC.STI);
}

if (!EmitCodeOnly) {
Expand Down
68 changes: 58 additions & 10 deletions bolt/lib/Profile/YAMLProfileReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ namespace opts {
extern cl::opt<unsigned> Verbosity;
extern cl::OptionCategory BoltOptCategory;
extern cl::opt<bool> InferStaleProfile;
extern cl::opt<bool> MatchProfileWithFunctionHash;
extern cl::opt<bool> Lite;

static llvm::cl::opt<bool>
IgnoreHash("profile-ignore-hash",
Expand Down Expand Up @@ -363,9 +365,19 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
return Profile.Hash == static_cast<uint64_t>(BF.getHash());
};

// We have to do 2 passes since LTO introduces an ambiguity in function
// names. The first pass assigns profiles that match 100% by name and
// by hash. The second pass allows name ambiguity for LTO private functions.
uint64_t MatchedWithExactName = 0;
uint64_t MatchedWithHash = 0;
uint64_t MatchedWithLTOCommonName = 0;

// Computes hash for binary functions.
if (opts::MatchProfileWithFunctionHash)
for (auto &[_, BF] : BC.getBinaryFunctions())
BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
else if (!opts::IgnoreHash)
for (BinaryFunction *BF : ProfileBFs)
BF->computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);

// This first pass assigns profiles that match 100% by name and by hash.
for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {
if (!BF)
continue;
Expand All @@ -374,15 +386,34 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
// the profile.
Function.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE);

// Recompute hash once per function.
if (!opts::IgnoreHash)
Function.computeHash(YamlBP.Header.IsDFSOrder,
YamlBP.Header.HashFunction);

if (profileMatches(YamlBF, Function))
if (profileMatches(YamlBF, Function)) {
matchProfileToFunction(YamlBF, Function);
++MatchedWithExactName;
}
}

// Uses the strict hash of profiled and binary functions to match functions
shawbyoung marked this conversation as resolved.
Show resolved Hide resolved
// that are not matched by name or common name.
if (opts::MatchProfileWithFunctionHash) {
std::unordered_map<size_t, BinaryFunction *> StrictHashToBF;
StrictHashToBF.reserve(BC.getBinaryFunctions().size());

for (auto &[_, BF] : BC.getBinaryFunctions())
StrictHashToBF[BF.getHash()] = &BF;

for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
if (YamlBF.Used)
continue;
auto It = StrictHashToBF.find(YamlBF.Hash);
if (It != StrictHashToBF.end() && !ProfiledFunctions.count(It->second)) {
auto *BF = It->second;
shawbyoung marked this conversation as resolved.
Show resolved Hide resolved
matchProfileToFunction(YamlBF, *BF);
++MatchedWithHash;
}
}
}

// This second pass allows name ambiguity for LTO private functions.
for (const auto &[CommonName, LTOProfiles] : LTOCommonNameMap) {
if (!LTOCommonNameFunctionMap.contains(CommonName))
continue;
Expand All @@ -396,6 +427,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
for (BinaryFunction *BF : Functions) {
if (!ProfiledFunctions.count(BF) && profileMatches(*YamlBF, *BF)) {
matchProfileToFunction(*YamlBF, *BF);
++MatchedWithLTOCommonName;
return true;
}
}
Expand All @@ -407,8 +439,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
// partially.
if (!ProfileMatched && LTOProfiles.size() == 1 && Functions.size() == 1 &&
!LTOProfiles.front()->Used &&
!ProfiledFunctions.count(*Functions.begin()))
!ProfiledFunctions.count(*Functions.begin())) {
matchProfileToFunction(*LTOProfiles.front(), **Functions.begin());
++MatchedWithLTOCommonName;
}
}

for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs))
Expand All @@ -420,6 +454,15 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
<< '\n';

if (opts::Verbosity >= 2) {
outs() << "BOLT-INFO: matched " << MatchedWithExactName
<< " functions with identical names\n";
outs() << "BOLT-INFO: matched " << MatchedWithHash
<< " functions with hash\n";
outs() << "BOLT-INFO: matched " << MatchedWithLTOCommonName
<< " functions with matching LTO common names\n";
}

shawbyoung marked this conversation as resolved.
Show resolved Hide resolved
// Set for parseFunctionProfile().
NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
NormalizeByCalls = usesEvent("branches");
Expand All @@ -439,6 +482,11 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {

BC.setNumUnusedProfiledObjects(NumUnused);

if (opts::Lite)
for (BinaryFunction *BF : BC.getAllBinaryFunctions())
if (ProfiledFunctions.find(BF) == ProfiledFunctions.end())
shawbyoung marked this conversation as resolved.
Show resolved Hide resolved
BF->setIgnored();

return Error::success();
}

Expand Down
61 changes: 34 additions & 27 deletions bolt/lib/Rewrite/DWARFRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ namespace bolt {
class DIEStreamer : public DwarfStreamer {
DIEBuilder *DIEBldr;
DWARFRewriter &Rewriter;
GDBIndex &GDBIndexSection;

private:
/// Emit the compilation unit header for \p Unit in the debug_info
Expand Down Expand Up @@ -247,7 +248,7 @@ class DIEStreamer : public DwarfStreamer {
const uint64_t TypeSignature = cast<DWARFTypeUnit>(Unit).getTypeHash();
DIE *TypeDIE = DIEBldr->getTypeDIE(Unit);
const DIEBuilder::DWARFUnitInfo &UI = DIEBldr->getUnitInfoByDwarfUnit(Unit);
Rewriter.addGDBTypeUnitEntry(
GDBIndexSection.addGDBTypeUnitEntry(
{UI.UnitOffset, TypeSignature, TypeDIE->getOffset()});
if (Unit.getVersion() < 5) {
// Switch the section to .debug_types section.
Expand Down Expand Up @@ -279,11 +280,12 @@ class DIEStreamer : public DwarfStreamer {

public:
DIEStreamer(DIEBuilder *DIEBldr, DWARFRewriter &Rewriter,
GDBIndex &GDBIndexSection,
DWARFLinkerBase::OutputFileType OutFileType,
raw_pwrite_stream &OutFile,
DWARFLinkerBase::MessageHandlerTy Warning)
: DwarfStreamer(OutFileType, OutFile, Warning), DIEBldr(DIEBldr),
Rewriter(Rewriter){};
Rewriter(Rewriter), GDBIndexSection(GDBIndexSection) {};

using DwarfStreamer::emitCompileUnitHeader;

Expand Down Expand Up @@ -326,12 +328,11 @@ static cl::opt<bool> KeepARanges(
"keep or generate .debug_aranges section if .gdb_index is written"),
cl::Hidden, cl::cat(BoltCategory));

static cl::opt<bool>
DeterministicDebugInfo("deterministic-debuginfo",
cl::desc("disables parallel execution of tasks that may produce "
"nondeterministic debug info"),
cl::init(true),
cl::cat(BoltCategory));
static cl::opt<bool> DeterministicDebugInfo(
"deterministic-debuginfo",
cl::desc("disables parallel execution of tasks that may produce "
"nondeterministic debug info"),
cl::init(true), cl::cat(BoltCategory));

static cl::opt<std::string> DwarfOutputPath(
"dwarf-output-path",
Expand Down Expand Up @@ -460,10 +461,11 @@ static std::optional<uint64_t> getAsAddress(const DWARFUnit &DU,
static std::unique_ptr<DIEStreamer>
createDIEStreamer(const Triple &TheTriple, raw_pwrite_stream &OutFile,
StringRef Swift5ReflectionSegmentName, DIEBuilder &DIEBldr,
DWARFRewriter &Rewriter) {
DWARFRewriter &Rewriter, GDBIndex &GDBIndexSection) {

std::unique_ptr<DIEStreamer> Streamer = std::make_unique<DIEStreamer>(
&DIEBldr, Rewriter, DWARFLinkerBase::OutputFileType::Object, OutFile,
&DIEBldr, Rewriter, GDBIndexSection,
DWARFLinkerBase::OutputFileType::Object, OutFile,
[&](const Twine &Warning, StringRef Context, const DWARFDie *) {});
Error Err = Streamer->init(TheTriple, Swift5ReflectionSegmentName);
if (Err)
Expand All @@ -484,13 +486,12 @@ emitUnit(DIEBuilder &DIEBldr, DIEStreamer &Streamer, DWARFUnit &Unit) {
return {U.UnitOffset, U.UnitLength, TypeHash};
}

static void emitDWOBuilder(const std::string &DWOName,
DIEBuilder &DWODIEBuilder, DWARFRewriter &Rewriter,
DWARFUnit &SplitCU, DWARFUnit &CU,
DWARFRewriter::DWPState &State,
DebugLocWriter &LocWriter,
DebugStrOffsetsWriter &StrOffstsWriter,
DebugStrWriter &StrWriter) {
static void
emitDWOBuilder(const std::string &DWOName, DIEBuilder &DWODIEBuilder,
DWARFRewriter &Rewriter, DWARFUnit &SplitCU, DWARFUnit &CU,
DWARFRewriter::DWPState &State, DebugLocWriter &LocWriter,
DebugStrOffsetsWriter &StrOffstsWriter,
DebugStrWriter &StrWriter, GDBIndex &GDBIndexSection) {
// Populate debug_info and debug_abbrev for current dwo into StringRef.
DWODIEBuilder.generateAbbrevs();
DWODIEBuilder.finish();
Expand All @@ -500,8 +501,9 @@ static void emitDWOBuilder(const std::string &DWOName,
std::make_shared<raw_svector_ostream>(OutBuffer);
const object::ObjectFile *File = SplitCU.getContext().getDWARFObj().getFile();
auto TheTriple = std::make_unique<Triple>(File->makeTriple());
std::unique_ptr<DIEStreamer> Streamer = createDIEStreamer(
*TheTriple, *ObjOS, "DwoStreamerInitAug2", DWODIEBuilder, Rewriter);
std::unique_ptr<DIEStreamer> Streamer =
createDIEStreamer(*TheTriple, *ObjOS, "DwoStreamerInitAug2",
DWODIEBuilder, Rewriter, GDBIndexSection);
DWARFRewriter::UnitMetaVectorType TUMetaVector;
DWARFRewriter::UnitMeta CUMI = {0, 0, 0};
if (SplitCU.getContext().getMaxDWOVersion() >= 5) {
Expand Down Expand Up @@ -652,6 +654,7 @@ void DWARFRewriter::updateDebugInfo() {

DWARF5AcceleratorTable DebugNamesTable(opts::CreateDebugNames, BC,
*StrWriter);
GDBIndex GDBIndexSection(BC);
DWPState State;
if (opts::WriteDWP)
initDWPState(State);
Expand Down Expand Up @@ -704,7 +707,8 @@ void DWARFRewriter::updateDebugInfo() {
TempRangesSectionWriter->finalizeSection();

emitDWOBuilder(DWOName, DWODIEBuilder, *this, **SplitCU, *Unit, State,
DebugLocDWoWriter, DWOStrOffstsWriter, DWOStrWriter);
DebugLocDWoWriter, DWOStrOffstsWriter, DWOStrWriter,
GDBIndexSection);
}

if (Unit->getVersion() >= 5) {
Expand All @@ -729,9 +733,10 @@ void DWARFRewriter::updateDebugInfo() {
std::make_unique<raw_svector_ostream>(OutBuffer);
const object::ObjectFile *File = BC.DwCtx->getDWARFObj().getFile();
auto TheTriple = std::make_unique<Triple>(File->makeTriple());
std::unique_ptr<DIEStreamer> Streamer =
createDIEStreamer(*TheTriple, *ObjOS, "TypeStreamer", DIEBlder, *this);
CUOffsetMap OffsetMap = finalizeTypeSections(DIEBlder, *Streamer);
std::unique_ptr<DIEStreamer> Streamer = createDIEStreamer(
*TheTriple, *ObjOS, "TypeStreamer", DIEBlder, *this, GDBIndexSection);
CUOffsetMap OffsetMap =
finalizeTypeSections(DIEBlder, *Streamer, GDBIndexSection);

const bool SingleThreadedMode =
opts::NoThreads || opts::DeterministicDebugInfo;
Expand Down Expand Up @@ -761,7 +766,8 @@ void DWARFRewriter::updateDebugInfo() {

finalizeDebugSections(DIEBlder, DebugNamesTable, *Streamer, *ObjOS,
OffsetMap);
updateGdbIndexSection(OffsetMap, CUIndex);
GDBIndexSection.updateGdbIndexSection(OffsetMap, CUIndex,
*ARangesSectionWriter);
}

void DWARFRewriter::updateUnitDebugInfo(
Expand Down Expand Up @@ -1429,7 +1435,8 @@ void DWARFRewriter::updateLineTableOffsets(const MCAsmLayout &Layout) {
}

CUOffsetMap DWARFRewriter::finalizeTypeSections(DIEBuilder &DIEBlder,
DIEStreamer &Streamer) {
DIEStreamer &Streamer,
GDBIndex &GDBIndexSection) {
// update TypeUnit DW_AT_stmt_list with new .debug_line information.
auto updateLineTable = [&](const DWARFUnit &Unit) -> void {
DIE *UnitDIE = DIEBlder.getUnitDIEbyUnit(Unit);
Expand All @@ -1449,8 +1456,8 @@ CUOffsetMap DWARFRewriter::finalizeTypeSections(DIEBuilder &DIEBlder,
std::make_shared<raw_svector_ostream>(OutBuffer);
const object::ObjectFile *File = BC.DwCtx->getDWARFObj().getFile();
auto TheTriple = std::make_unique<Triple>(File->makeTriple());
std::unique_ptr<DIEStreamer> TypeStreamer =
createDIEStreamer(*TheTriple, *ObjOS, "TypeStreamer", DIEBlder, *this);
std::unique_ptr<DIEStreamer> TypeStreamer = createDIEStreamer(
*TheTriple, *ObjOS, "TypeStreamer", DIEBlder, *this, GDBIndexSection);

// generate debug_info and CUMap
CUOffsetMap CUMap;
Expand Down
4 changes: 4 additions & 0 deletions bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ extern cl::opt<bool> Hugify;
extern cl::opt<bool> Instrument;
extern cl::opt<JumpTableSupportLevel> JumpTables;
extern cl::opt<bool> KeepNops;
extern cl::opt<bool> MatchProfileWithFunctionHash;
extern cl::list<std::string> ReorderData;
extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
extern cl::opt<bool> TerminalTrap;
Expand Down Expand Up @@ -2982,6 +2983,9 @@ void RewriteInstance::selectFunctionsToProcess() {
if (mustSkip(Function))
return false;

if (opts::MatchProfileWithFunctionHash)
return true;
shawbyoung marked this conversation as resolved.
Show resolved Hide resolved

// If the list is not empty, only process functions from the list.
if (!opts::ForceFunctionNames.empty() || !ForceFunctionsNR.empty()) {
// Regex check (-funcs and -funcs-file options).
Expand Down
5 changes: 5 additions & 0 deletions bolt/lib/Utils/CommandLineOpts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,11 @@ cl::opt<bool>
cl::desc("instrument code to generate accurate profile data"),
cl::cat(BoltOptCategory));

cl::opt<bool>
MatchProfileWithFunctionHash("match-profile-with-function-hash",
cl::desc("Match profile with function hash"),
cl::Hidden, cl::cat(BoltCategory));

cl::opt<std::string>
OutputFilename("o",
cl::desc("<output file>"),
Expand Down
Loading