From 7832769d329ead264aff238c06dce086b3a74922 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 28 May 2024 19:46:23 -0600 Subject: [PATCH] Revert "[lld] Support thumb PLTs" (#93631) Reverts llvm/llvm-project#86223 windows pre-merge is broken. --- lld/ELF/Arch/ARM.cpp | 176 ++++++++------------------- lld/ELF/Config.h | 1 - lld/ELF/InputFiles.cpp | 12 -- lld/test/ELF/armv8-thumb-plt-reloc.s | 126 ------------------- 4 files changed, 53 insertions(+), 262 deletions(-) delete mode 100644 lld/test/ELF/armv8-thumb-plt-reloc.s diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp index 3e0efe540e1bf1..687f9499009d5e 100644 --- a/lld/ELF/Arch/ARM.cpp +++ b/lld/ELF/Arch/ARM.cpp @@ -231,71 +231,36 @@ static void writePltHeaderLong(uint8_t *buf) { // The default PLT header requires the .got.plt to be within 128 Mb of the // .plt in the positive direction. void ARM::writePltHeader(uint8_t *buf) const { - if (config->armThumbPLTs) { - // The instruction sequence for thumb: - // - // 0: b500 push {lr} - // 2: f8df e008 ldr.w lr, [pc, #0x8] @ 0xe - // 6: 44fe add lr, pc - // 8: f85e ff08 ldr pc, [lr, #8]! - // e: .word .got.plt - .plt - 16 - // - // At 0x8, we want to jump to .got.plt, the -16 accounts for 8 bytes from - // `pc` in the add instruction and 8 bytes for the `lr` adjustment. - // - uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 16; - assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset"); - write16(buf + 0, 0xb500); - // Split into two halves to support endianness correctly. - write16(buf + 2, 0xf8df); - write16(buf + 4, 0xe008); - write16(buf + 6, 0x44fe); - // Split into two halves to support endianness correctly. - write16(buf + 8, 0xf85e); - write16(buf + 10, 0xff08); - write32(buf + 12, offset); - - memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary - memcpy(buf + 20, trapInstr.data(), 4); - memcpy(buf + 24, trapInstr.data(), 4); - memcpy(buf + 28, trapInstr.data(), 4); - } else { - // Use a similar sequence to that in writePlt(), the difference is the - // calling conventions mean we use lr instead of ip. The PLT entry is - // responsible for saving lr on the stack, the dynamic loader is responsible - // for reloading it. - const uint32_t pltData[] = { - 0xe52de004, // L1: str lr, [sp,#-4]! - 0xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4) - 0xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4) - 0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4) - }; - - uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 4; - if (!llvm::isUInt<27>(offset)) { - // We cannot encode the Offset, use the long form. - writePltHeaderLong(buf); - return; - } - write32(buf + 0, pltData[0]); - write32(buf + 4, pltData[1] | ((offset >> 20) & 0xff)); - write32(buf + 8, pltData[2] | ((offset >> 12) & 0xff)); - write32(buf + 12, pltData[3] | (offset & 0xfff)); - memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary - memcpy(buf + 20, trapInstr.data(), 4); - memcpy(buf + 24, trapInstr.data(), 4); - memcpy(buf + 28, trapInstr.data(), 4); + // Use a similar sequence to that in writePlt(), the difference is the calling + // conventions mean we use lr instead of ip. The PLT entry is responsible for + // saving lr on the stack, the dynamic loader is responsible for reloading + // it. + const uint32_t pltData[] = { + 0xe52de004, // L1: str lr, [sp,#-4]! + 0xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4) + 0xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4) + 0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4) + }; + + uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 4; + if (!llvm::isUInt<27>(offset)) { + // We cannot encode the Offset, use the long form. + writePltHeaderLong(buf); + return; } + write32(buf + 0, pltData[0]); + write32(buf + 4, pltData[1] | ((offset >> 20) & 0xff)); + write32(buf + 8, pltData[2] | ((offset >> 12) & 0xff)); + write32(buf + 12, pltData[3] | (offset & 0xfff)); + memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary + memcpy(buf + 20, trapInstr.data(), 4); + memcpy(buf + 24, trapInstr.data(), 4); + memcpy(buf + 28, trapInstr.data(), 4); } void ARM::addPltHeaderSymbols(InputSection &isec) const { - if (config->armThumbPLTs) { - addSyntheticLocal("$t", STT_NOTYPE, 0, 0, isec); - addSyntheticLocal("$d", STT_NOTYPE, 12, 0, isec); - } else { - addSyntheticLocal("$a", STT_NOTYPE, 0, 0, isec); - addSyntheticLocal("$d", STT_NOTYPE, 16, 0, isec); - } + addSyntheticLocal("$a", STT_NOTYPE, 0, 0, isec); + addSyntheticLocal("$d", STT_NOTYPE, 16, 0, isec); } // Long form PLT entries that do not have any restrictions on the displacement @@ -314,65 +279,32 @@ static void writePltLong(uint8_t *buf, uint64_t gotPltEntryAddr, // .plt in the positive direction. void ARM::writePlt(uint8_t *buf, const Symbol &sym, uint64_t pltEntryAddr) const { + // The PLT entry is similar to the example given in Appendix A of ELF for + // the Arm Architecture. Instead of using the Group Relocations to find the + // optimal rotation for the 8-bit immediate used in the add instructions we + // hard code the most compact rotations for simplicity. This saves a load + // instruction over the long plt sequences. + const uint32_t pltData[] = { + 0xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8 + 0xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8 + 0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8 + }; - if (!config->armThumbPLTs) { - uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 8; - - // The PLT entry is similar to the example given in Appendix A of ELF for - // the Arm Architecture. Instead of using the Group Relocations to find the - // optimal rotation for the 8-bit immediate used in the add instructions we - // hard code the most compact rotations for simplicity. This saves a load - // instruction over the long plt sequences. - const uint32_t pltData[] = { - 0xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8 - 0xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8 - 0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8 - }; - if (!llvm::isUInt<27>(offset)) { - // We cannot encode the Offset, use the long form. - writePltLong(buf, sym.getGotPltVA(), pltEntryAddr); - return; - } - write32(buf + 0, pltData[0] | ((offset >> 20) & 0xff)); - write32(buf + 4, pltData[1] | ((offset >> 12) & 0xff)); - write32(buf + 8, pltData[2] | (offset & 0xfff)); - memcpy(buf + 12, trapInstr.data(), 4); // Pad to 16-byte boundary - } else { - uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 12; - assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset"); - - // A PLT entry will be: - // - // movw ip, # - // movt ip, # - // add ip, pc - // L1: ldr.w pc, [ip] - // b L1 - // - // where ip = r12 = 0xc - - // movw ip, # - write16(buf + 2, 0x0c00); // use `ip` - relocateNoSym(buf, R_ARM_THM_MOVW_ABS_NC, offset); - - // movt ip, # - write16(buf + 6, 0x0c00); // use `ip` - relocateNoSym(buf + 4, R_ARM_THM_MOVT_ABS, offset); - - write16(buf + 8, 0x44fc); // add ip, pc - write16(buf + 10, 0xf8dc); // ldr.w pc, [ip] (bottom half) - write16(buf + 12, 0xf000); // ldr.w pc, [ip] (upper half) - write16(buf + 14, 0xe7fc); // Branch to previous instruction + uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 8; + if (!llvm::isUInt<27>(offset)) { + // We cannot encode the Offset, use the long form. + writePltLong(buf, sym.getGotPltVA(), pltEntryAddr); + return; } + write32(buf + 0, pltData[0] | ((offset >> 20) & 0xff)); + write32(buf + 4, pltData[1] | ((offset >> 12) & 0xff)); + write32(buf + 8, pltData[2] | (offset & 0xfff)); + memcpy(buf + 12, trapInstr.data(), 4); // Pad to 16-byte boundary } void ARM::addPltSymbols(InputSection &isec, uint64_t off) const { - if (config->armThumbPLTs) { - addSyntheticLocal("$t", STT_NOTYPE, off, 0, isec); - } else { - addSyntheticLocal("$a", STT_NOTYPE, off, 0, isec); - addSyntheticLocal("$d", STT_NOTYPE, off + 12, 0, isec); - } + addSyntheticLocal("$a", STT_NOTYPE, off, 0, isec); + addSyntheticLocal("$d", STT_NOTYPE, off + 12, 0, isec); } bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, @@ -393,8 +325,6 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, case R_ARM_JUMP24: // Source is ARM, all PLT entries are ARM so no interworking required. // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb). - assert(!config->armThumbPLTs && - "If the source is ARM, we should not need Thumb PLTs"); if (s.isFunc() && expr == R_PC && (s.getVA() & 1)) return true; [[fallthrough]]; @@ -405,9 +335,9 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, } case R_ARM_THM_JUMP19: case R_ARM_THM_JUMP24: - // Source is Thumb, when all PLT entries are ARM interworking is required. + // Source is Thumb, all PLT entries are ARM so interworking is required. // Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM). - if ((expr == R_PLT_PC && !config->armThumbPLTs) || (s.isFunc() && (s.getVA() & 1) == 0)) + if (expr == R_PLT_PC || (s.isFunc() && (s.getVA() & 1) == 0)) return true; [[fallthrough]]; case R_ARM_THM_CALL: { @@ -617,6 +547,7 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { // STT_FUNC we choose whether to write a BL or BLX depending on the // value of bit 0 of Val. With bit 0 == 1 denoting Thumb. If the symbol is // not of type STT_FUNC then we must preserve the original instruction. + // PLT entries are always ARM state so we know we don't need to interwork. assert(rel.sym); // R_ARM_CALL is always reached via relocate(). bool bit0Thumb = val & 1; bool isBlx = (read32(loc) & 0xfe000000) == 0xfa000000; @@ -675,13 +606,12 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { // PLT entries are always ARM state so we know we need to interwork. assert(rel.sym); // R_ARM_THM_CALL is always reached via relocate(). bool bit0Thumb = val & 1; - bool useThumb = bit0Thumb || config->armThumbPLTs; bool isBlx = (read16(loc + 2) & 0x1000) == 0; // lld 10.0 and before always used bit0Thumb when deciding to write a BLX - // even when type not STT_FUNC. - if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == useThumb) + // even when type not STT_FUNC. PLT entries generated by LLD are always ARM. + if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == bit0Thumb) stateChangeWarning(loc, rel.type, *rel.sym); - if ((rel.sym->isFunc() || rel.sym->isInPlt()) ? !useThumb : isBlx) { + if (rel.sym->isFunc() || rel.sym->isInPlt() ? !bit0Thumb : isBlx) { // We are writing a BLX. Ensure BLX destination is 4-byte aligned. As // the BLX instruction may only be two byte aligned. This must be done // before overflow check. diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index 883c4a2f84294c..f0dfe7f377de0e 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -217,7 +217,6 @@ struct Config { bool allowMultipleDefinition; bool fatLTOObjects; bool androidPackDynRelocs = false; - bool armThumbPLTs = false; bool armHasBlx = false; bool armHasMovtMovw = false; bool armJ1J2BranchEncoding = false; diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index d760dddcf5ec5c..1f496026d3ae20 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -194,18 +194,6 @@ static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) { if (arch >= ARMBuildAttrs::CPUArch::v8_M_Base && profile == ARMBuildAttrs::MicroControllerProfile) config->armCMSESupport = true; - - // The thumb PLT entries require Thumb2 which can be used on multiple archs. - // For now, let's limit it to ones where ARM isn't available and we know have - // Thumb2. - std::optional armISA = - attributes.getAttributeValue(ARMBuildAttrs::ARM_ISA_use); - std::optional thumb = - attributes.getAttributeValue(ARMBuildAttrs::THUMB_ISA_use); - bool noArmISA = !armISA || *armISA == ARMBuildAttrs::Not_Allowed; - bool hasThumb2 = thumb && *thumb >= ARMBuildAttrs::AllowThumb32; - if (noArmISA && hasThumb2) - config->armThumbPLTs = true; } InputFile::InputFile(Kind k, MemoryBufferRef m) diff --git a/lld/test/ELF/armv8-thumb-plt-reloc.s b/lld/test/ELF/armv8-thumb-plt-reloc.s deleted file mode 100644 index 47cd5c1b741ee0..00000000000000 --- a/lld/test/ELF/armv8-thumb-plt-reloc.s +++ /dev/null @@ -1,126 +0,0 @@ -// REQUIRES: arm -// RUN: llvm-mc -filetype=obj -arm-add-build-attributes --arch=thumb --mcpu=cortex-m33 %p/Inputs/arm-plt-reloc.s -o %t1 -// RUN: llvm-mc -filetype=obj -arm-add-build-attributes --arch=thumb --mcpu=cortex-m33 %s -o %t2 -// RUN: ld.lld %t1 %t2 -o %t -// RUN: llvm-objdump --no-print-imm-hex -d %t | FileCheck %s -// RUN: ld.lld -shared %t1 %t2 -o %t.so -// RUN: llvm-objdump --no-print-imm-hex -d %t.so | FileCheck --check-prefix=DSO %s -// RUN: llvm-readelf -S -r %t.so | FileCheck -check-prefix=DSOREL %s - -// RUN: llvm-mc -filetype=obj -arm-add-build-attributes --arch=thumbeb --mcpu=cortex-m33 %p/Inputs/arm-plt-reloc.s -o %t1.be -// RUN: llvm-mc -filetype=obj -arm-add-build-attributes --arch=thumbeb --mcpu=cortex-m33 %s -o %t2.be -// RUN: ld.lld %t1.be %t2.be -o %t.be -// RUN: llvm-objdump --no-print-imm-hex -d %t.be | FileCheck %s -// RUN: ld.lld -shared %t1.be %t2.be -o %t.so.be -// RUN: llvm-objdump --no-print-imm-hex -d %t.so.be | FileCheck --check-prefix=DSO %s -// RUN: llvm-readelf -S -r %t.so.be | FileCheck -check-prefix=DSOREL %s - -// RUN: ld.lld --be8 %t1.be %t2.be -o %t.be -// RUN: llvm-objdump --no-print-imm-hex -d %t.be | FileCheck %s -// RUN: ld.lld --be8 -shared %t1.be %t2.be -o %t.so.be -// RUN: llvm-objdump --no-print-imm-hex -d %t.so.be | FileCheck --check-prefix=DSO %s -// RUN: llvm-readelf -S -r %t.so.be | FileCheck -check-prefix=DSOREL %s - -/// Test PLT entry generation - .text - .align 2 - .globl _start - .type _start,%function -_start: - bl func1 - bl func2 - bl func3 - b.w func1 - b.w func2 - b.w func3 - beq.w func1 - beq.w func2 - beq.w func3 - -/// Executable, expect no PLT -// CHECK: Disassembly of section .text: -// CHECK-EMPTY: -// CHECK-NEXT: : -// CHECK-NEXT: bx lr -// CHECK: : -// CHECK-NEXT: bx lr -// CHECK: : -// CHECK-NEXT: bx lr -// CHECK-NEXT: d4d4 -// CHECK: <_start>: -// CHECK-NEXT: bl {{.*}} -// CHECK-NEXT: bl {{.*}} -// CHECK-NEXT: bl {{.*}} -// CHECK-NEXT: b.w {{.*}} -// CHECK-NEXT: b.w {{.*}} -// CHECK-NEXT: b.w {{.*}} -// CHECK-NEXT: beq.w {{.*}} -// CHECK-NEXT: beq.w {{.*}} -// CHECK-NEXT: beq.w {{.*}} - -// DSO: Disassembly of section .text: -// DSO-EMPTY: -// DSO-NEXT: : -// DSO-NEXT: bx lr -// DSO: : -// DSO-NEXT: bx lr -// DSO: : -// DSO-NEXT: bx lr -// DSO-NEXT: d4d4 -// DSO: <_start>: -/// 0x10260 = PLT func1 -// DSO-NEXT: bl 0x10260 -/// 0x10270 = PLT func2 -// DSO-NEXT: bl 0x10270 -/// 0x10280 = PLT func3 -// DSO-NEXT: bl 0x10280 -/// 0x10260 = PLT func1 -// DSO-NEXT: b.w 0x10260 -/// 0x10270 = PLT func2 -// DSO-NEXT: b.w 0x10270 -/// 0x10280 = PLT func3 -// DSO-NEXT: b.w 0x10280 -/// 0x10260 = PLT func1 -// DSO-NEXT: beq.w 0x10260 -/// 0x10270 = PLT func2 -// DSO-NEXT: beq.w 0x10270 -/// 0x10280 = PLT func3 -// DSO-NEXT: beq.w 0x10280 -// DSO: Disassembly of section .plt: -// DSO-EMPTY: -// DSO-NEXT: 10240 <.plt>: -// DSO-NEXT: push {lr} -// DSO-NEXT: ldr.w lr, [pc, #8] -// DSO-NEXT: add lr, pc -// DSO-NEXT: ldr pc, [lr, #8]! -/// 0x20098 = .got.plt (0x302D8) - pc (0x10238 = .plt + 8) - 8 -// DSO-NEXT: .word 0x00020098 -// DSO-NEXT: .word 0xd4d4d4d4 -// DSO-NEXT: .word 0xd4d4d4d4 -// DSO-NEXT: .word 0xd4d4d4d4 -// DSO-NEXT: .word 0xd4d4d4d4 - -/// 136 + 2 << 16 + 0x1026c = 0x302f4 = got entry 1 -// DSO-NEXT: 10260: f240 0c88 movw r12, #136 -// DSO-NEXT: f2c0 0c02 movt r12, #2 -// DSO-NEXT: 44fc add r12, pc -// DSO-NEXT: f8dc f000 ldr.w pc, [r12] -// DSO-NEXT: e7fc b 0x1026a -/// 124 + 2 << 16 + 0x1027c = 0x302f8 = got entry 2 -// DSO-NEXT: 10270: f240 0c7c movw r12, #124 -// DSO-NEXT: f2c0 0c02 movt r12, #2 -// DSO-NEXT: 44fc add r12, pc -// DSO-NEXT: f8dc f000 ldr.w pc, [r12] -// DSO-NEXT: e7fc b 0x1027a -/// 112 + 2 << 16 + 0x1028c = 0x302fc = got entry 3 -// DSO-NEXT: 10280: f240 0c70 movw r12, #112 -// DSO-NEXT: f2c0 0c02 movt r12, #2 -// DSO-NEXT: 44fc add r12, pc -// DSO-NEXT: f8dc f000 ldr.w pc, [r12] -// DSO-NEXT: e7fc b 0x1028a - -// DSOREL: .got.plt PROGBITS 000302e8 {{.*}} 000018 00 WA 0 0 4 -// DSOREL: Relocation section '.rel.plt' -// DSOREL: 000302f4 {{.*}} R_ARM_JUMP_SLOT {{.*}} func1 -// DSOREL: 000302f8 {{.*}} R_ARM_JUMP_SLOT {{.*}} func2 -// DSOREL: 000302fc {{.*}} R_ARM_JUMP_SLOT {{.*}} func3