Skip to content

Commit

Permalink
[lld-macho] Fix thunks for non-__text TEXT sections (#99052)
Browse files Browse the repository at this point in the history
This supersedes #87818 and
fixes #52767

When calculating arm64 thunks, we make a few assumptions that may not
hold when considering code sections outside of `__text`:

1. That a section needs thunks only if its size is larger than the
branch range.
2. That any calls into `__stubs` are necessarily forward jumps (that is,
the section with the jump is ordered before `__stubs`)

Sections like this exist in the wild, most prominently the
`__lcxx_overrides` section introduced in
#69498

This change:
- Ensures that if one section in `__TEXT` gets thunks, all of them do.
- Makes all code sections in `__TEXT` contiguous (and guaranteed to be
placed before `__stubs`)
  • Loading branch information
speednoisemovement authored Jul 23, 2024
1 parent 81e2a57 commit 58f3c5e
Show file tree
Hide file tree
Showing 10 changed files with 112 additions and 22 deletions.
1 change: 1 addition & 0 deletions lld/MachO/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ add_lld_library(lldMachO
OutputSegment.cpp
Relocations.cpp
SectionPriorities.cpp
Sections.cpp
SymbolTable.cpp
Symbols.cpp
SyntheticSections.cpp
Expand Down
14 changes: 12 additions & 2 deletions lld/MachO/ConcatOutputSection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,20 @@ bool TextOutputSection::needsThunks() const {
uint64_t isecAddr = addr;
for (ConcatInputSection *isec : inputs)
isecAddr = alignToPowerOf2(isecAddr, isec->align) + isec->getSize();
if (isecAddr - addr + in.stubs->getSize() <=
std::min(target->backwardBranchRange, target->forwardBranchRange))
// Other sections besides __text might be small enough to pass this
// test but nevertheless need thunks for calling into other sections.
// An imperfect heuristic to use in this case is that if a section
// we've already processed in this segment needs thunks, so do the
// rest.
bool needsThunks = parent && parent->needsThunks;
if (!needsThunks &&
isecAddr - addr + in.stubs->getSize() <=
std::min(target->backwardBranchRange, target->forwardBranchRange))
return false;
// Yes, this program is large enough to need thunks.
if (parent) {
parent->needsThunks = true;
}
for (ConcatInputSection *isec : inputs) {
for (Reloc &r : isec->relocs) {
if (!target->hasAttr(r.type, RelocAttrBits::BRANCH))
Expand Down
17 changes: 3 additions & 14 deletions lld/MachO/InputSection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "Config.h"
#include "InputFiles.h"
#include "OutputSegment.h"
#include "Sections.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
Expand Down Expand Up @@ -366,20 +367,8 @@ uint64_t WordLiteralInputSection::getOffset(uint64_t off) const {
}

bool macho::isCodeSection(const InputSection *isec) {
uint32_t type = sectionType(isec->getFlags());
if (type != S_REGULAR && type != S_COALESCED)
return false;

uint32_t attr = isec->getFlags() & SECTION_ATTRIBUTES_USR;
if (attr == S_ATTR_PURE_INSTRUCTIONS)
return true;

if (isec->getSegName() == segment_names::text)
return StringSwitch<bool>(isec->getName())
.Cases(section_names::textCoalNt, section_names::staticInit, true)
.Default(false);

return false;
return sections::isCodeSection(isec->getName(), isec->getSegName(),
isec->getFlags());
}

bool macho::isCfStringSection(const InputSection *isec) {
Expand Down
16 changes: 14 additions & 2 deletions lld/MachO/OutputSegment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "OutputSegment.h"
#include "ConcatOutputSection.h"
#include "InputSection.h"
#include "Sections.h"
#include "Symbols.h"
#include "SyntheticSections.h"

Expand Down Expand Up @@ -89,9 +90,20 @@ static int sectionOrder(OutputSection *osec) {
StringRef segname = osec->parent->name;
// Sections are uniquely identified by their segment + section name.
if (segname == segment_names::text) {
if (osec->name == section_names::header)
return -7;
// `__text` needs to precede the other code sections since its
// expected to be the largest. This means in effect that it will
// be the section that determines whether we need thunks or not.
if (osec->name == section_names::text)
return -6;
// Ensure all code sections are contiguous with `__text` for thunk
// calculations.
if (sections::isCodeSection(osec->name, segment_names::text, osec->flags) &&
osec->name != section_names::stubHelper) {
return -5;
}
return StringSwitch<int>(osec->name)
.Case(section_names::header, -6)
.Case(section_names::text, -5)
.Case(section_names::stubs, -4)
.Case(section_names::stubHelper, -3)
.Case(section_names::objcStubs, -2)
Expand Down
1 change: 1 addition & 0 deletions lld/MachO/OutputSegment.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ class OutputSegment {
uint32_t initProt = 0;
uint32_t flags = 0;
uint8_t index;
bool needsThunks = false;

llvm::TinyPtrVector<Defined *> segmentStartSymbols;
llvm::TinyPtrVector<Defined *> segmentEndSymbols;
Expand Down
36 changes: 36 additions & 0 deletions lld/MachO/Sections.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
//===- Sections.cpp ---------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "Sections.h"
#include "InputSection.h"
#include "OutputSegment.h"

#include "llvm/ADT/StringSwitch.h"

using namespace llvm;
using namespace llvm::MachO;

namespace lld::macho::sections {
bool isCodeSection(StringRef name, StringRef segName, uint32_t flags) {
uint32_t type = sectionType(flags);
if (type != S_REGULAR && type != S_COALESCED)
return false;

uint32_t attr = flags & SECTION_ATTRIBUTES_USR;
if (attr == S_ATTR_PURE_INSTRUCTIONS)
return true;

if (segName == segment_names::text)
return StringSwitch<bool>(name)
.Cases(section_names::textCoalNt, section_names::staticInit, true)
.Default(false);

return false;
}

} // namespace lld::macho::sections
19 changes: 19 additions & 0 deletions lld/MachO/Sections.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
//===- Sections.h ------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLD_MACHO_SECTIONS_H
#define LLD_MACHO_SECTIONS_H

#include "llvm/ADT/StringRef.h"

namespace lld::macho::sections {
bool isCodeSection(llvm::StringRef name, llvm::StringRef segName,
uint32_t flags);
} // namespace lld::macho::sections

#endif // #ifndef LLD_MACHO_SECTIONS_H
21 changes: 20 additions & 1 deletion lld/test/MachO/arm64-thunks.s
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@
## (3) a second thunk is created when the first one goes out of range
## (4) early calls to a dylib stub use a thunk, and later calls the stub
## directly
## (5) Thunks are created for all sections in the text segment with branches.
## Notes:
## 0x4000000 = 64 Mi = half the magnitude of the forward-branch range

# RUN: rm -rf %t; mkdir %t
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t/input.o
# RUN: %lld -arch arm64 -dead_strip -lSystem -o %t/thunk %t/input.o
# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -o %t/thunk %t/input.o
# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/thunk | FileCheck %s

# CHECK: Disassembly of section __TEXT,__text:
Expand Down Expand Up @@ -164,6 +165,10 @@
# CHECK: adrp x16, 0x[[#%x, F_PAGE]]
# CHECK: add x16, x16, #[[#F_OFFSET]]

# CHECK: Disassembly of section __TEXT,__lcxx_override:
# CHECK: <_z>:
# CHECK: bl 0x[[#%x, A_THUNK_0]] <_a.thunk.0>

# CHECK: Disassembly of section __TEXT,__stubs:

# CHECK: [[#%x, NAN_PAGE + NAN_OFFSET]] <__stubs>:
Expand Down Expand Up @@ -300,3 +305,17 @@ _main:
bl _h
bl ___nan
ret

.section __TEXT,__cstring
.space 0x4000000

.section __TEXT,__lcxx_override,regular,pure_instructions

.globl _z
.no_dead_strip _z
.p2align 2
_z:
bl _a
## Ensure calling into stubs works
bl _extern_sym
ret
8 changes: 5 additions & 3 deletions lld/test/MachO/section-order.s
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,20 @@
# CHECK-12-NEXT: __cstring

# CHECK-21: __text
## `foo` always sorts next to `__text` since it's a code section
## and needs to be adjacent for arm64 thunk calculations
# CHECK-21-NEXT: foo
# CHECK-21-NEXT: __cstring
# CHECK-21-NEXT: bar
# CHECK-21-NEXT: foo

# CHECK-SYNTHETIC-ORDER: __text
# CHECK-SYNTHETIC-ORDER-NEXT: foo
# CHECK-SYNTHETIC-ORDER-NEXT: __stubs
# CHECK-SYNTHETIC-ORDER-NEXT: __stub_helper
# CHECK-SYNTHETIC-ORDER-NEXT: __objc_stubs
# CHECK-SYNTHETIC-ORDER-NEXT: __init_offsets
# CHECK-SYNTHETIC-ORDER-NEXT: __cstring
# CHECK-SYNTHETIC-ORDER-NEXT: bar
# CHECK-SYNTHETIC-ORDER-NEXT: foo
# CHECK-SYNTHETIC-ORDER-NEXT: __unwind_info
# CHECK-SYNTHETIC-ORDER-NEXT: __eh_frame
# CHECK-SYNTHETIC-ORDER-NEXT: __objc_selrefs
Expand All @@ -52,5 +54,5 @@
.asciz ""
.section __TEXT,bar
.space 1
.section __TEXT,foo
.section __TEXT,foo,regular,pure_instructions
.space 1
1 change: 1 addition & 0 deletions llvm/utils/gn/secondary/lld/MachO/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ static_library("MachO") {
"OutputSegment.cpp",
"Relocations.cpp",
"SectionPriorities.cpp",
"Sections.cpp",
"SymbolTable.cpp",
"Symbols.cpp",
"SyntheticSections.cpp",
Expand Down

0 comments on commit 58f3c5e

Please sign in to comment.