Skip to content

Commit

Permalink
[BOLT] Support POSSIBLE_PIC_FIXED_BRANCH
Browse files Browse the repository at this point in the history
Detect and support fixed PIC indirect jumps of the following form:
```
movslq  En(%rip), %r1
leaq  PIC_JUMP_TABLE(%rip), %r2
addq  %r2, %r1
jmpq  *%r1
```

with PIC_JUMP_TABLE that looks like following:

```
  JT:  ----------
   E1:| L1 - JT  |
      |----------|
   E2:| L2 - JT  |
      |----------|
      |          |
         ......
   En:| Ln - JT  |
       ----------
```

The code could be produced by compilers, see
#91648.

Test Plan: updated jump-table-fixed-ref-pic.test

Reviewers: maksfb, ayermolo, dcci, rafaelauler

Reviewed By: rafaelauler

Pull Request: #91667
  • Loading branch information
aaupov committed Jul 19, 2024
1 parent 401d7bc commit 3023b15
Show file tree
Hide file tree
Showing 10 changed files with 153 additions and 59 deletions.
3 changes: 3 additions & 0 deletions bolt/include/bolt/Core/BinaryContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,9 @@ class BinaryContext {
return nullptr;
}

/// Deregister JumpTable registered at a given \p Address and delete it.
void deleteJumpTable(uint64_t Address);

unsigned getDWARFEncodingSize(unsigned Encoding) {
if (Encoding == dwarf::DW_EH_PE_omit)
return 0;
Expand Down
13 changes: 7 additions & 6 deletions bolt/include/bolt/Core/MCPlusBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ enum class IndirectBranchType : char {
POSSIBLE_PIC_JUMP_TABLE, /// Possibly a jump table for PIC.
POSSIBLE_GOTO, /// Possibly a gcc's computed goto.
POSSIBLE_FIXED_BRANCH, /// Possibly an indirect branch to a fixed location.
POSSIBLE_PIC_FIXED_BRANCH, /// Possibly an indirect jump to a fixed entry in a
/// PIC jump table.
};

class MCPlusBuilder {
Expand Down Expand Up @@ -1474,12 +1476,11 @@ class MCPlusBuilder {
/// will be set to the different components of the branch. \p MemLocInstr
/// is the instruction that loads up the indirect function pointer. It may
/// or may not be same as \p Instruction.
virtual IndirectBranchType
analyzeIndirectBranch(MCInst &Instruction, InstructionIterator Begin,
InstructionIterator End, const unsigned PtrSize,
MCInst *&MemLocInstr, unsigned &BaseRegNum,
unsigned &IndexRegNum, int64_t &DispValue,
const MCExpr *&DispExpr, MCInst *&PCRelBaseOut) const {
virtual IndirectBranchType analyzeIndirectBranch(
MCInst &Instruction, InstructionIterator Begin, InstructionIterator End,
const unsigned PtrSize, MCInst *&MemLocInstr, unsigned &BaseRegNum,
unsigned &IndexRegNum, int64_t &DispValue, const MCExpr *&DispExpr,
MCInst *&PCRelBaseOut, MCInst *&FixedEntryLoadInst) const {
llvm_unreachable("not implemented");
return IndirectBranchType::UNKNOWN;
}
Expand Down
10 changes: 10 additions & 0 deletions bolt/lib/Core/BinaryContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2523,6 +2523,16 @@ BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
return nullptr;
}

/// Deregister JumpTable registered at a given \p Address and delete it.
void BinaryContext::deleteJumpTable(uint64_t Address) {
assert(JumpTables.count(Address) && "Must have a jump table at address");
JumpTable *JT = JumpTables.at(Address);
for (BinaryFunction *Parent : JT->Parents)
Parent->JumpTables.erase(Address);
JumpTables.erase(Address);
delete JT;
}

DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
const DWARFAddressRangesVector &InputRanges) const {
DebugAddressRangesVector OutputRanges;
Expand Down
47 changes: 45 additions & 2 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,9 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
// setting the value of the register used by the branch.
MCInst *MemLocInstr;

// The instruction loading the fixed PIC jump table entry value.
MCInst *FixedEntryLoadInstr;

// Address of the table referenced by MemLocInstr. Could be either an
// array of function pointers, or a jump table.
uint64_t ArrayStart = 0;
Expand Down Expand Up @@ -811,7 +814,7 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,

IndirectBranchType BranchType = BC.MIB->analyzeIndirectBranch(
Instruction, Begin, Instructions.end(), PtrSize, MemLocInstr, BaseRegNum,
IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
IndexRegNum, DispValue, DispExpr, PCRelBaseInstr, FixedEntryLoadInstr);

if (BranchType == IndirectBranchType::UNKNOWN && !MemLocInstr)
return BranchType;
Expand Down Expand Up @@ -877,6 +880,43 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
if (BaseRegNum == BC.MRI->getProgramCounter())
ArrayStart += getAddress() + Offset + Size;

if (FixedEntryLoadInstr) {
assert(BranchType == IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH &&
"Invalid IndirectBranch type");
MCInst::iterator FixedEntryDispOperand =
BC.MIB->getMemOperandDisp(*FixedEntryLoadInstr);
assert(FixedEntryDispOperand != FixedEntryLoadInstr->end() &&
"Invalid memory instruction");
const MCExpr *FixedEntryDispExpr = FixedEntryDispOperand->getExpr();
const uint64_t EntryAddress = getExprValue(FixedEntryDispExpr);
uint64_t EntrySize = BC.getJumpTableEntrySize(JumpTable::JTT_PIC);
ErrorOr<int64_t> Value =
BC.getSignedValueAtAddress(EntryAddress, EntrySize);
if (!Value)
return IndirectBranchType::UNKNOWN;

BC.outs() << "BOLT-INFO: fixed PIC indirect branch detected in " << *this
<< " at 0x" << Twine::utohexstr(getAddress() + Offset)
<< " referencing data at 0x" << Twine::utohexstr(EntryAddress)
<< " the destination value is 0x"
<< Twine::utohexstr(ArrayStart + *Value) << '\n';

TargetAddress = ArrayStart + *Value;

// Remove spurious JumpTable at EntryAddress caused by PIC reference from
// the load instruction.
BC.deleteJumpTable(EntryAddress);

// Replace FixedEntryDispExpr used in target address calculation with outer
// jump table reference.
JumpTable *JT = BC.getJumpTableContainingAddress(ArrayStart);
assert(JT && "Must have a containing jump table for PIC fixed branch");
BC.MIB->replaceMemOperandDisp(*FixedEntryLoadInstr, JT->getFirstLabel(),
EntryAddress - ArrayStart, &*BC.Ctx);

return BranchType;
}

LLVM_DEBUG(dbgs() << "BOLT-DEBUG: addressed memory is 0x"
<< Twine::utohexstr(ArrayStart) << '\n');

Expand Down Expand Up @@ -1126,6 +1166,7 @@ void BinaryFunction::handleIndirectBranch(MCInst &Instruction, uint64_t Size,
}
case IndirectBranchType::POSSIBLE_JUMP_TABLE:
case IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE:
case IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH:
if (opts::JumpTables == JTS_NONE)
IsSimple = false;
break;
Expand Down Expand Up @@ -1878,9 +1919,11 @@ bool BinaryFunction::postProcessIndirectBranches(
int64_t DispValue;
const MCExpr *DispExpr;
MCInst *PCRelBaseInstr;
MCInst *FixedEntryLoadInstr;
IndirectBranchType Type = BC.MIB->analyzeIndirectBranch(
Instr, BB.begin(), II, PtrSize, MemLocInstr, BaseRegNum,
IndexRegNum, DispValue, DispExpr, PCRelBaseInstr);
IndexRegNum, DispValue, DispExpr, PCRelBaseInstr,
FixedEntryLoadInstr);
if (Type != IndirectBranchType::UNKNOWN || MemLocInstr != nullptr)
continue;

Expand Down
4 changes: 3 additions & 1 deletion bolt/lib/Passes/IndirectCallPromotion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -386,13 +386,15 @@ IndirectCallPromotion::maybeGetHotJumpTableTargets(BinaryBasicBlock &BB,
JumpTableInfoType HotTargets;
MCInst *MemLocInstr;
MCInst *PCRelBaseOut;
MCInst *FixedEntryLoadInstr;
unsigned BaseReg, IndexReg;
int64_t DispValue;
const MCExpr *DispExpr;
MutableArrayRef<MCInst> Insts(&BB.front(), &CallInst);
const IndirectBranchType Type = BC.MIB->analyzeIndirectBranch(
CallInst, Insts.begin(), Insts.end(), BC.AsmInfo->getCodePointerSize(),
MemLocInstr, BaseReg, IndexReg, DispValue, DispExpr, PCRelBaseOut);
MemLocInstr, BaseReg, IndexReg, DispValue, DispExpr, PCRelBaseOut,
FixedEntryLoadInstr);

assert(MemLocInstr && "There should always be a load for jump tables");
if (!MemLocInstr)
Expand Down
13 changes: 8 additions & 5 deletions bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -852,16 +852,19 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
return Uses;
}

IndirectBranchType analyzeIndirectBranch(
MCInst &Instruction, InstructionIterator Begin, InstructionIterator End,
const unsigned PtrSize, MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut,
unsigned &IndexRegNumOut, int64_t &DispValueOut,
const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut) const override {
IndirectBranchType
analyzeIndirectBranch(MCInst &Instruction, InstructionIterator Begin,
InstructionIterator End, const unsigned PtrSize,
MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut,
unsigned &IndexRegNumOut, int64_t &DispValueOut,
const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut,
MCInst *&FixedEntryLoadInstr) const override {
MemLocInstrOut = nullptr;
BaseRegNumOut = AArch64::NoRegister;
IndexRegNumOut = AArch64::NoRegister;
DispValueOut = 0;
DispExprOut = nullptr;
FixedEntryLoadInstr = nullptr;

// An instruction referencing memory used by jump instruction (directly or
// via register). This location could be an array of function pointers
Expand Down
3 changes: 2 additions & 1 deletion bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,13 +176,14 @@ class RISCVMCPlusBuilder : public MCPlusBuilder {
MCInst &Instruction, InstructionIterator Begin, InstructionIterator End,
const unsigned PtrSize, MCInst *&MemLocInstr, unsigned &BaseRegNum,
unsigned &IndexRegNum, int64_t &DispValue, const MCExpr *&DispExpr,
MCInst *&PCRelBaseOut) const override {
MCInst *&PCRelBaseOut, MCInst *&FixedEntryLoadInst) const override {
MemLocInstr = nullptr;
BaseRegNum = 0;
IndexRegNum = 0;
DispValue = 0;
DispExpr = nullptr;
PCRelBaseOut = nullptr;
FixedEntryLoadInst = nullptr;

// Check for the following long tail call sequence:
// 1: auipc xi, %pcrel_hi(sym)
Expand Down
105 changes: 66 additions & 39 deletions bolt/lib/Target/X86/X86MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1866,8 +1866,11 @@ class X86MCPlusBuilder : public MCPlusBuilder {
return true;
}

/// Analyzes PIC-style jump table code template and return identified
/// IndirectBranchType, MemLocInstr (all cases) and FixedEntryLoadInstr
/// (POSSIBLE_PIC_FIXED_BRANCH case).
template <typename Itr>
std::pair<IndirectBranchType, MCInst *>
std::tuple<IndirectBranchType, MCInst *, MCInst *>
analyzePICJumpTable(Itr II, Itr IE, MCPhysReg R1, MCPhysReg R2) const {
// Analyze PIC-style jump table code template:
//
Expand All @@ -1876,6 +1879,13 @@ class X86MCPlusBuilder : public MCPlusBuilder {
// add %r2, %r1
// jmp *%r1
//
// or a fixed indirect jump template:
//
// movslq En(%rip), {%r2|%r1} <- FixedEntryLoadInstr
// lea PIC_JUMP_TABLE(%rip), {%r1|%r2} <- MemLocInstr
// add %r2, %r1
// jmp *%r1
//
// (with any irrelevant instructions in-between)
//
// When we call this helper we've already determined %r1 and %r2, and
Expand Down Expand Up @@ -1916,8 +1926,13 @@ class X86MCPlusBuilder : public MCPlusBuilder {
MO.SegRegNum == X86::NoRegister;
};
LLVM_DEBUG(dbgs() << "Checking for PIC jump table\n");
MCInst *MemLocInstr = nullptr;
const MCInst *MovInstr = nullptr;
MCInst *FirstInstr = nullptr;
MCInst *SecondInstr = nullptr;
enum {
NOMATCH = 0,
MATCH_JUMP_TABLE,
MATCH_FIXED_BRANCH,
} MatchingState = NOMATCH;
while (++II != IE) {
MCInst &Instr = *II;
const MCInstrDesc &InstrDesc = Info->get(Instr.getOpcode());
Expand All @@ -1926,68 +1941,76 @@ class X86MCPlusBuilder : public MCPlusBuilder {
// Ignore instructions that don't affect R1, R2 registers.
continue;
}
if (!MovInstr) {
// Expect to see MOV instruction.
if (!isMOVSX64rm32(Instr)) {
LLVM_DEBUG(dbgs() << "MOV instruction expected.\n");
const bool IsMOVSXInstr = isMOVSX64rm32(Instr);
const bool IsLEAInstr = isLEA64r(Instr);
if (MatchingState == NOMATCH) {
if (IsMOVSXInstr)
MatchingState = MATCH_JUMP_TABLE;
else if (IsLEAInstr)
MatchingState = MATCH_FIXED_BRANCH;
else
break;
}

// Check if it's setting %r1 or %r2. In canonical form it sets %r2.
// If it sets %r1 - rename the registers so we have to only check
// a single form.
unsigned MovDestReg = Instr.getOperand(0).getReg();
if (MovDestReg != R2)
// Check if the first instruction is setting %r1 or %r2. In canonical
// form lea sets %r1 and mov sets %r2. If it's the opposite - rename so
// we have to only check a single form.
unsigned DestReg = Instr.getOperand(0).getReg();
MCPhysReg &ExpectReg = MatchingState == MATCH_JUMP_TABLE ? R2 : R1;
if (DestReg != ExpectReg)
std::swap(R1, R2);
if (MovDestReg != R2) {
LLVM_DEBUG(dbgs() << "MOV instruction expected to set %r2\n");
if (DestReg != ExpectReg)
break;
}

// Verify operands for MOV.
// Verify operands
std::optional<X86MemOperand> MO = evaluateX86MemoryOperand(Instr);
if (!MO)
break;
if (!isIndexed(*MO, R1))
// POSSIBLE_PIC_JUMP_TABLE
if ((MatchingState == MATCH_JUMP_TABLE && isIndexed(*MO, R1)) ||
(MatchingState == MATCH_FIXED_BRANCH && isRIPRel(*MO)))
FirstInstr = &Instr;
else
break;
MovInstr = &Instr;
} else {
if (!InstrDesc.hasDefOfPhysReg(Instr, R1, *RegInfo))
unsigned ExpectReg = MatchingState == MATCH_JUMP_TABLE ? R1 : R2;
if (!InstrDesc.hasDefOfPhysReg(Instr, ExpectReg, *RegInfo))
continue;
if (!isLEA64r(Instr)) {
LLVM_DEBUG(dbgs() << "LEA instruction expected\n");
if ((MatchingState == MATCH_JUMP_TABLE && !IsLEAInstr) ||
(MatchingState == MATCH_FIXED_BRANCH && !IsMOVSXInstr))
break;
}
if (Instr.getOperand(0).getReg() != R1) {
LLVM_DEBUG(dbgs() << "LEA instruction expected to set %r1\n");
if (Instr.getOperand(0).getReg() != ExpectReg)
break;
}

// Verify operands for LEA.
// Verify operands.
std::optional<X86MemOperand> MO = evaluateX86MemoryOperand(Instr);
if (!MO)
break;
if (!isRIPRel(*MO))
break;
MemLocInstr = &Instr;
SecondInstr = &Instr;
break;
}
}

if (!MemLocInstr)
return std::make_pair(IndirectBranchType::UNKNOWN, nullptr);
if (!SecondInstr)
return std::make_tuple(IndirectBranchType::UNKNOWN, nullptr, nullptr);

if (MatchingState == MATCH_FIXED_BRANCH) {
LLVM_DEBUG(dbgs() << "checking potential fixed indirect branch\n");
return std::make_tuple(IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH,
FirstInstr, SecondInstr);
}
LLVM_DEBUG(dbgs() << "checking potential PIC jump table\n");
return std::make_pair(IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE,
MemLocInstr);
return std::make_tuple(IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE,
SecondInstr, nullptr);
}

IndirectBranchType analyzeIndirectBranch(
MCInst &Instruction, InstructionIterator Begin, InstructionIterator End,
const unsigned PtrSize, MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut,
unsigned &IndexRegNumOut, int64_t &DispValueOut,
const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut) const override {
IndirectBranchType
analyzeIndirectBranch(MCInst &Instruction, InstructionIterator Begin,
InstructionIterator End, const unsigned PtrSize,
MCInst *&MemLocInstrOut, unsigned &BaseRegNumOut,
unsigned &IndexRegNumOut, int64_t &DispValueOut,
const MCExpr *&DispExprOut, MCInst *&PCRelBaseOut,
MCInst *&FixedEntryLoadInst) const override {
// Try to find a (base) memory location from where the address for
// the indirect branch is loaded. For X86-64 the memory will be specified
// in the following format:
Expand All @@ -2014,6 +2037,7 @@ class X86MCPlusBuilder : public MCPlusBuilder {
IndexRegNumOut = X86::NoRegister;
DispValueOut = 0;
DispExprOut = nullptr;
FixedEntryLoadInst = nullptr;

std::reverse_iterator<InstructionIterator> II(End);
std::reverse_iterator<InstructionIterator> IE(Begin);
Expand Down Expand Up @@ -2046,7 +2070,8 @@ class X86MCPlusBuilder : public MCPlusBuilder {
unsigned R2 = PrevInstr.getOperand(2).getReg();
if (R1 == R2)
return IndirectBranchType::UNKNOWN;
std::tie(Type, MemLocInstr) = analyzePICJumpTable(PrevII, IE, R1, R2);
std::tie(Type, MemLocInstr, FixedEntryLoadInst) =
analyzePICJumpTable(PrevII, IE, R1, R2);
break;
}
return IndirectBranchType::UNKNOWN;
Expand Down Expand Up @@ -2090,6 +2115,8 @@ class X86MCPlusBuilder : public MCPlusBuilder {
if (MO->ScaleImm != 1 || MO->BaseRegNum != RIPRegister)
return IndirectBranchType::UNKNOWN;
break;
case IndirectBranchType::POSSIBLE_PIC_FIXED_BRANCH:
break;
default:
if (MO->ScaleImm != PtrSize)
return IndirectBranchType::UNKNOWN;
Expand Down
2 changes: 1 addition & 1 deletion bolt/test/X86/Inputs/jump-table-fixed-ref-pic.s
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ main:
jae .L4
cmpq $0x1, %rdi
jne .L4
mov .Ljt_pic+8(%rip), %rax
movslq .Ljt_pic+8(%rip), %rax
lea .Ljt_pic(%rip), %rdx
add %rdx, %rax
jmpq *%rax
Expand Down
Loading

0 comments on commit 3023b15

Please sign in to comment.