Skip to content

Commit

Permalink
Merge branch 'main' into users/paschalis-mpeis/bolt-heatmap-docs
Browse files Browse the repository at this point in the history
  • Loading branch information
paschalis-mpeis authored Jul 15, 2024
2 parents 3c7b4df + 71051de commit ae0fd59
Show file tree
Hide file tree
Showing 5,713 changed files with 128,607 additions and 53,499 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
23 changes: 23 additions & 0 deletions .github/new-prs-labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,29 @@ backend:AArch64:
- clang/include/clang/Sema/SemaARM.h
- clang/lib/Sema/SemaARM.cpp

backend:Hexagon:
- clang/include/clang/Basic/BuiltinsHexagon*.def
- clang/include/clang/Sema/SemaHexagon.h
- clang/lib/Basic/Targets/Hexagon.*
- clang/lib/CodeGen/Targets/Hexagon.cpp
- clang/lib/Driver/ToolChains/Hexagon.*
- clang/lib/Sema/SemaHexagon.cpp
- lld/ELF/Arch/Hexagon.cpp
- lldb/source/Plugins/ABI/Hexagon/**
- lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/**
- llvm/include/llvm/BinaryFormat/ELFRelocs/Hexagon.def
- llvm/include/llvm/IR/IntrinsicsHexagon*
- llvm/include/llvm/Support/Hexagon*
- llvm/lib/Support/Hexagon*
- llvm/lib/Target/Hexagon/**
- llvm/test/CodeGen/Hexagon/**
- llvm/test/CodeGen/*/Hexagon/**
- llvm/test/DebugInfo/*/Hexagon/**
- llvm/test/Transforms/*/Hexagon
- llvm/test/MC/Disassembler/Hexagon/**
- llvm/test/MC/Hexagon/**
- llvm/test/tools/llvm-objdump/ELF/Hexagon/**

backend:loongarch:
- llvm/include/llvm/IR/IntrinsicsLoongArch.td
- llvm/test/MC/LoongArch/**
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/libcxx-build-and-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ jobs:
cxx: [ 'clang++-19' ]
include:
- config: 'generic-gcc'
cc: 'gcc-13'
cxx: 'g++-13'
cc: 'gcc-14'
cxx: 'g++-14'
steps:
- uses: actions/checkout@v4
- name: ${{ matrix.config }}.${{ matrix.cxx }}
Expand Down Expand Up @@ -101,8 +101,8 @@ jobs:
cxx: [ 'clang++-19' ]
include:
- config: 'generic-gcc-cxx11'
cc: 'gcc-13'
cxx: 'g++-13'
cc: 'gcc-14'
cxx: 'g++-14'
- config: 'generic-cxx23'
cc: 'clang-17'
cxx: 'clang++-17'
Expand Down
4 changes: 4 additions & 0 deletions bolt/docs/CommandLineArgumentReference.md
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,10 @@

Use a modified clustering algorithm geared towards minimizing branches

- `--name-similarity-function-matching-threshold=<uint>`

Match functions using namespace and edit distance.

- `--no-inline`

Disable all inlining (overrides other inlining options)
Expand Down
1 change: 1 addition & 0 deletions bolt/docs/Heatmaps.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ $ llvm-bolt-heatmap -p perf.data <executable>
By default the heatmap will be dumped to *stdout*. You can change it
with `-o <heatmapfile>` option.


If you prefer to look at the data in a browser (or would like to share
it that way), then you can use an HTML conversion tool. E.g.:

Expand Down
5 changes: 5 additions & 0 deletions bolt/docs/OptimizingLinux.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ $ perf2bolt -p perf.data -o perf.fdata vmlinux

Under a high load, `perf.data` should be several gigabytes in size and you should expect the converted `perf.fdata` not to exceed 100 MB.

Profiles collected from multiple workloads could be joined into a single profile using `merge-fdata` utility:
```bash
$ merge-fdata perf.1.fdata perf.2.fdata ... perf.<N>.fdata > perf.merged.fdata
```

Two changes are required for the kernel build. The first one is optional but highly recommended. It introduces a BOLT-reserved space into `vmlinux` code section:


Expand Down
19 changes: 15 additions & 4 deletions bolt/include/bolt/Core/DebugData.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,28 +210,39 @@ class DebugRangesSectionWriter {
static bool classof(const DebugRangesSectionWriter *Writer) {
return Writer->getKind() == RangesWriterKind::DebugRangesWriter;
}

/// Append a range to the main buffer.
void appendToRangeBuffer(const DebugBufferVector &CUBuffer);

/// Sets Unit DIE to be updated for CU.
void setDie(DIE *Die) { this->Die = Die; }

/// Returns Unit DIE to be updated for CU.
DIE *getDie() const { return Die; }

/// Writes out range lists for a current CU being processed.
void virtual finalizeSection(){};

/// Needs to be invoked before each \p CU is processed.
void virtual initSection(DWARFUnit &CU){};

/// Initializes Ranges section with empty list.
void initSection();

protected:
std::unique_ptr<DebugBufferVector> RangesBuffer;

std::unique_ptr<raw_svector_ostream> RangesStream;

std::mutex WriterMutex;

/// Current offset in the section (updated as new entries are written).
/// Starts with 16 since the first 16 bytes are reserved for an empty range.
uint32_t SectionOffset{0};

/// Offset of an empty address ranges list.
static constexpr uint64_t EmptyRangesOffset{0};

private:
/// Stores Unit DIE to be updated for CU.
DIE *Die{0};

RangesWriterKind Kind;
};

Expand Down
4 changes: 4 additions & 0 deletions bolt/include/bolt/Core/DebugNames.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ class DWARF5AcceleratorTable {
uint64_t CurrentUnitOffset = 0;
const DWARFUnit *CurrentUnit = nullptr;
std::unordered_map<uint32_t, uint32_t> AbbrevTagToIndexMap;
/// Contains a map of TU hashes to a Foreign TU indecies.
/// This is used to reduce the size of Foreign TU list since there could be
/// multiple TUs with the same hash.
DenseMap<uint64_t, uint32_t> TUHashToIndexMap;

/// Represents a group of entries with identical name (and hence, hash value).
struct HashData {
Expand Down
8 changes: 8 additions & 0 deletions bolt/include/bolt/Core/HashUtilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "bolt/Core/BinaryBasicBlock.h"
#include "bolt/Core/BinaryContext.h"
#include "bolt/Profile/ProfileYAMLMapping.h"

namespace llvm {
namespace bolt {
Expand All @@ -35,6 +36,13 @@ std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB,

std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB);

std::string hashBlockCalls(BinaryContext &BC, const BinaryBasicBlock &BB);

std::string
hashBlockCalls(const DenseMap<uint32_t, yaml::bolt::BinaryFunctionProfile *>
&IdToYamlFunction,
const yaml::bolt::BinaryBasicBlockProfile &YamlBB);

} // namespace bolt
} // namespace llvm

Expand Down
8 changes: 6 additions & 2 deletions bolt/include/bolt/Core/MCPlusBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -2041,9 +2041,13 @@ class MCPlusBuilder {
return InstructionListType();
}

/// Returns a function body that contains only a return instruction. An
/// example usage is a workaround for the '__bolt_fini_trampoline' of
// Instrumentation.
virtual InstructionListType createDummyReturnFunction(MCContext *Ctx) const {
llvm_unreachable("not implemented");
return InstructionListType();
InstructionListType Insts(1);
createReturn(Insts[0]);
return Insts;
}

/// This method takes an indirect call instruction and splits it up into an
Expand Down
23 changes: 23 additions & 0 deletions bolt/include/bolt/Profile/YAMLProfileReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ class YAMLProfileReader : public ProfileReaderBase {
/// Check if the file contains YAML.
static bool isYAML(StringRef Filename);

using ProfileLookupMap =
DenseMap<uint32_t, yaml::bolt::BinaryFunctionProfile *>;

private:
/// Adjustments for basic samples profiles (without LBR).
bool NormalizeByInsnCount{false};
Expand All @@ -56,6 +59,10 @@ class YAMLProfileReader : public ProfileReaderBase {
/// is attributed.
FunctionSet ProfiledFunctions;

/// Maps profiled function id to function, for function matching with calls as
/// anchors.
ProfileLookupMap IdToYamLBF;

/// For LTO symbol resolution.
/// Map a common LTO prefix to a list of YAML profiles matching the prefix.
StringMap<std::vector<yaml::bolt::BinaryFunctionProfile *>> LTOCommonNameMap;
Expand All @@ -73,13 +80,29 @@ class YAMLProfileReader : public ProfileReaderBase {
bool parseFunctionProfile(BinaryFunction &Function,
const yaml::bolt::BinaryFunctionProfile &YamlBF);

/// Checks if a function profile matches a binary function.
bool profileMatches(const yaml::bolt::BinaryFunctionProfile &Profile,
const BinaryFunction &BF);

/// Infer function profile from stale data (collected on older binaries).
bool inferStaleProfile(BinaryFunction &Function,
const yaml::bolt::BinaryFunctionProfile &YamlBF);

/// Initialize maps for profile matching.
void buildNameMaps(BinaryContext &BC);

/// Matches functions using exact name.
size_t matchWithExactName();

/// Matches function using LTO comomon name.
size_t matchWithLTOCommonName();

/// Matches functions using exact hash.
size_t matchWithHash(BinaryContext &BC);

/// Matches functions with similarly named profiled functions.
size_t matchWithNameSimilarity(BinaryContext &BC);

/// Update matched YAML -> BinaryFunction pair.
void matchProfileToFunction(yaml::bolt::BinaryFunctionProfile &YamlBF,
BinaryFunction &BF) {
Expand Down
4 changes: 4 additions & 0 deletions bolt/include/bolt/Rewrite/DWARFRewriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ class DWARFRewriter {
/// Store Rangelists writer for each DWO CU.
RangeListsDWOWriers RangeListsWritersByCU;

/// Stores ranges writer for each DWO CU.
std::unordered_map<uint64_t, std::unique_ptr<DebugRangesSectionWriter>>
LegacyRangesWritersByCU;

std::mutex LocListDebugInfoPatchesMutex;

/// Dwo id specific its RangesBase.
Expand Down
6 changes: 6 additions & 0 deletions bolt/include/bolt/Utils/NameResolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ class NameResolver {
std::tie(LHS, RHS) = UniqueName.split(Sep);
return (LHS + Suffix + Twine(Sep) + RHS).str();
}

// Drops the suffix that describes the function's number of names.
static StringRef dropNumNames(StringRef Name) {
const size_t Pos = Name.find("(*");
return Pos != StringRef::npos ? Name.substr(0, Pos) : Name;
}
};

} // namespace bolt
Expand Down
8 changes: 0 additions & 8 deletions bolt/lib/Core/BinaryContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2403,16 +2403,8 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
Streamer->emitLabel(SplitStartLabel);
emitFunctionBody(*Streamer, BF, FF, /*EmitCodeOnly=*/true);
Streamer->emitLabel(SplitEndLabel);
// To avoid calling MCObjectStreamer::flushPendingLabels() which is
// private
Streamer->emitBytes(StringRef(""));
Streamer->switchSection(Section);
}

// To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
// MCStreamer::Finish(), which does more than we want
Streamer->emitBytes(StringRef(""));

MCAssembler &Assembler =
static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
Assembler.layout();
Expand Down
3 changes: 3 additions & 0 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2538,6 +2538,7 @@ struct CFISnapshot {
case MCCFIInstruction::OpWindowSave:
case MCCFIInstruction::OpNegateRAState:
case MCCFIInstruction::OpLLVMDefAspaceCfa:
case MCCFIInstruction::OpLabel:
llvm_unreachable("unsupported CFI opcode");
break;
case MCCFIInstruction::OpRememberState:
Expand Down Expand Up @@ -2675,6 +2676,7 @@ struct CFISnapshotDiff : public CFISnapshot {
case MCCFIInstruction::OpWindowSave:
case MCCFIInstruction::OpNegateRAState:
case MCCFIInstruction::OpLLVMDefAspaceCfa:
case MCCFIInstruction::OpLabel:
llvm_unreachable("unsupported CFI opcode");
return false;
case MCCFIInstruction::OpRememberState:
Expand Down Expand Up @@ -2823,6 +2825,7 @@ BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState,
case MCCFIInstruction::OpWindowSave:
case MCCFIInstruction::OpNegateRAState:
case MCCFIInstruction::OpLLVMDefAspaceCfa:
case MCCFIInstruction::OpLabel:
llvm_unreachable("unsupported CFI opcode");
break;
case MCCFIInstruction::OpGnuArgsSize:
Expand Down
20 changes: 13 additions & 7 deletions bolt/lib/Core/DebugData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,14 @@ DebugRangesSectionWriter::DebugRangesSectionWriter() {
RangesBuffer = std::make_unique<DebugBufferVector>();
RangesStream = std::make_unique<raw_svector_ostream>(*RangesBuffer);

// Add an empty range as the first entry;
SectionOffset +=
writeAddressRanges(*RangesStream.get(), DebugAddressRangesVector{});
Kind = RangesWriterKind::DebugRangesWriter;
}

void DebugRangesSectionWriter::initSection() {
// Adds an empty range to the buffer.
writeAddressRanges(*RangesStream.get(), DebugAddressRangesVector{});
}

uint64_t DebugRangesSectionWriter::addRanges(
DebugAddressRangesVector &&Ranges,
std::map<DebugAddressRangesVector, uint64_t> &CachedRanges) {
Expand All @@ -166,15 +168,20 @@ uint64_t DebugRangesSectionWriter::addRanges(DebugAddressRangesVector &Ranges) {
// Reading the SectionOffset and updating it should be atomic to guarantee
// unique and correct offsets in patches.
std::lock_guard<std::mutex> Lock(WriterMutex);
const uint32_t EntryOffset = SectionOffset;
SectionOffset += writeAddressRanges(*RangesStream.get(), Ranges);
const uint32_t EntryOffset = RangesBuffer->size();
writeAddressRanges(*RangesStream.get(), Ranges);

return EntryOffset;
}

uint64_t DebugRangesSectionWriter::getSectionOffset() {
std::lock_guard<std::mutex> Lock(WriterMutex);
return SectionOffset;
return RangesBuffer->size();
}

void DebugRangesSectionWriter::appendToRangeBuffer(
const DebugBufferVector &CUBuffer) {
*RangesStream << CUBuffer;
}

DebugAddrWriter *DebugRangeListsSectionWriter::AddrWriter = nullptr;
Expand Down Expand Up @@ -321,7 +328,6 @@ void DebugRangeListsSectionWriter::finalizeSection() {
*RangesStream << *Header;
*RangesStream << *CUArrayBuffer;
*RangesStream << *CUBodyBuffer;
SectionOffset = RangesBuffer->size();
}

void DebugRangeListsSectionWriter::initSection(DWARFUnit &Unit) {
Expand Down
15 changes: 12 additions & 3 deletions bolt/lib/Core/DebugNames.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,11 @@ void DWARF5AcceleratorTable::addUnit(DWARFUnit &Unit,
auto Iter = CUOffsetsToPatch.insert({*DWOID, CUList.size()});
if (Iter.second)
CUList.push_back(BADCUOFFSET);
ForeignTUList.push_back(cast<DWARFTypeUnit>(&Unit)->getTypeHash());
const uint64_t TUHash = cast<DWARFTypeUnit>(&Unit)->getTypeHash();
if (!TUHashToIndexMap.count(TUHash)) {
TUHashToIndexMap.insert({TUHash, ForeignTUList.size()});
ForeignTUList.push_back(TUHash);
}
} else {
LocalTUList.push_back(CurrentUnitOffset);
}
Expand Down Expand Up @@ -231,8 +235,13 @@ DWARF5AcceleratorTable::addAccelTableEntry(
IsTU = Unit.isTypeUnit();
DieTag = Die.getTag();
if (IsTU) {
if (DWOID)
return ForeignTUList.size() - 1;
if (DWOID) {
const uint64_t TUHash = cast<DWARFTypeUnit>(&Unit)->getTypeHash();
auto Iter = TUHashToIndexMap.find(TUHash);
assert(Iter != TUHashToIndexMap.end() &&
"Could not find TU hash in map");
return Iter->second;
}
return LocalTUList.size() - 1;
}
return CUList.size() - 1;
Expand Down
Loading

0 comments on commit ae0fd59

Please sign in to comment.