From 7a7b79aef23e17767053b50412b8027a925ae6c1 Mon Sep 17 00:00:00 2001 From: YongKang Zhu Date: Tue, 6 Feb 2024 15:25:13 -0800 Subject: [PATCH] [CFI][stackprobe] Shrink wrapper select safe prologue insertion block when inline stack probing is enabled Summary: For the switch-case code at the entry of this function: https://github.com/llvm/llvm-project/blob/main/libcxx/include/__algorithm/stable_sort.h#L193C1-L193C5 LLVM produces machine code at one point like: ``` bb.0: successors: %bb.19, %bb.20 liveins: $x0, $x1, $x2, $x3, $x4 %45:gpr64common = COPY $x2 %48:gpr64 = SUBSXri %45:gpr64common, 2, 0, implicit-def $nzcv; Bcc 3, %bb.19, implicit $nzcv; B %bb.20; bb.20: ; predecessors: %bb.0 successors: %bb.1, %bb.3 %49:gpr64 = SUBSXri %45:gpr64common, 2, 0, implicit-def $nzcv; Bcc 1, %bb.3, implicit $nzcv; B %bb.1; ``` The `machine-cse` pass later removes the `SUBSXri` instruction in the 2nd block and adds `$nzcv` as its `liveins`, which is legitimate: ``` bb.0: successors: %bb.19, %bb.20 liveins: $x0, $x1, $x2, $x3, $x4 %45:gpr64common = COPY $x2 %48:gpr64 = SUBSXri %45:gpr64common, 2, 0, implicit-def $nzcv; Bcc 3, %bb.19, implicit $nzcv; B %bb.20; bb.20: ; predecessors: %bb.0 successors: %bb.1, %bb.3 liveins: $nzcv Bcc 1, %bb.3, implicit $nzcv; B %bb.1; ``` The `shrinkwrap` pass later decides that we can put prologue inside `bb.20`, which also seems fine. And then the `prologepilog` pass inserts regular prologue sequence in the 2nd basic block, plus some stack probing code, which trashes `$nzcv` and is caught by an assertion: ``` Assertion `!LiveRegs.contains(Op.getReg()) && "live register clobbered by inserted prologue instructions"' failed. ``` In the 2nd block, the inserted stack probing instruction (with the destination as `x9`) redefines the live-in register `$nzcv`: ``` bb.1: ; predecessors: %bb.0 successors: %bb.2, %bb.4 liveins: $nzcv, $x0, $x1, $x2, $x3, $x4, $lr, $fp, $x27, $x28, $x25, $x26, $x23, $x24, $x21, $x22, $x19, $x20 early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp(tied-def 0), -12 :: (store (s64) into %stack.25), (store (s64) into %stack.24) frame-setup STPXi killed $x28, killed $x27, $sp, 2 :: (store (s64) into %stack.23), (store (s64) into %stack.22) frame-setup STPXi killed $x26, killed $x25, $sp, 4 :: (store (s64) into %stack.21), (store (s64) into %stack.20) frame-setup STPXi killed $x24, killed $x23, $sp, 6 :: (store (s64) into %stack.19), (store (s64) into %stack.18) frame-setup STPXi killed $x22, killed $x21, $sp, 8 :: (store (s64) into %stack.17), (store (s64) into %stack.16) frame-setup STPXi killed $x20, killed $x19, $sp, 10 :: (store (s64) into %stack.15), (store (s64) into %stack.14) $x9 = PROBED_STACKALLOC 432, 96, 0, implicit-def $sp, implicit-def $nzcv, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 528 frame-setup CFI_INSTRUCTION offset $w19, -8 frame-setup CFI_INSTRUCTION offset $w20, -16 frame-setup CFI_INSTRUCTION offset $w21, -24 frame-setup CFI_INSTRUCTION offset $w22, -32 frame-setup CFI_INSTRUCTION offset $w23, -40 frame-setup CFI_INSTRUCTION offset $w24, -48 frame-setup CFI_INSTRUCTION offset $w25, -56 frame-setup CFI_INSTRUCTION offset $w26, -64 frame-setup CFI_INSTRUCTION offset $w27, -72 frame-setup CFI_INSTRUCTION offset $w28, -80 frame-setup CFI_INSTRUCTION offset $w30, -88 frame-setup CFI_INSTRUCTION offset $w29, -96 renamable $x21 = COPY $x1 renamable $x20 = COPY $x0 Bcc 1, %bb.4, implicit $nzcv; B %bb.2; ``` One way to fix this is to change `ShrinkWrap` to go one scope out when such situation is detected, so potential stack probing instruction won't trash the lived in flag register. --- llvm/include/llvm/CodeGen/TargetLowering.h | 5 +++++ llvm/lib/CodeGen/ShrinkWrap.cpp | 9 +++++++++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 5 +++++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 3 +++ llvm/lib/Target/X86/X86ISelLowering.cpp | 5 +++++ llvm/lib/Target/X86/X86ISelLowering.h | 3 +++ 6 files changed, 30 insertions(+) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 612433b54f6e44..1e607a1c714afc 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2010,6 +2010,11 @@ class TargetLoweringBase { virtual bool hasInlineStackProbe(const MachineFunction &MF) const { return false; } + virtual bool + isStackProbeInstrDefinedFlagRegLiveIn(const MachineBasicBlock *MBB) const { + return false; + } + virtual StringRef getStackProbeSymbolName(const MachineFunction &MF) const { return ""; } diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp index ab57d08e527e4d..59f836e98a751c 100644 --- a/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -957,6 +957,15 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { if (!ArePointsInteresting()) return Changed; + const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); + while (TLI->hasInlineStackProbe(MF) && + TLI->isStackProbeInstrDefinedFlagRegLiveIn(Save)) { + Save = FindIDom<>(*Save, Save->predecessors(), *MDT); + Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT); + if (!ArePointsInteresting()) + return Changed; + } + LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: " << printMBBReference(*Save) << ' ' << "\nRestore: " << printMBBReference(*Restore) << '\n'); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index a3b7e3128ac1a4..ea7d955c49efc3 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -27349,3 +27349,8 @@ bool AArch64TargetLowering::hasInlineStackProbe( return !Subtarget->isTargetWindows() && MF.getInfo()->hasStackProbing(); } + +bool AArch64TargetLowering::isStackProbeInstrDefinedFlagRegLiveIn( + const MachineBasicBlock *MBB) const { + return MBB->isLiveIn(AArch64::NZCV); +} diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 436b21fd134632..b22573d1b5fb31 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -997,6 +997,9 @@ class AArch64TargetLowering : public TargetLowering { /// True if stack clash protection is enabled for this functions. bool hasInlineStackProbe(const MachineFunction &MF) const override; + bool isStackProbeInstrDefinedFlagRegLiveIn( + const MachineBasicBlock *MBB) const override; + private: /// Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when generating code for different targets. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 18f9871b2bd0c3..89bbd59806f830 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -57882,6 +57882,11 @@ X86TargetLowering::getStackProbeSize(const MachineFunction &MF) const { 4096); } +bool X86TargetLowering::isStackProbeInstrDefinedFlagRegLiveIn( + const MachineBasicBlock *MBB) const { + return MBB->isLiveIn(X86::EFLAGS); +} + Align X86TargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { if (ML && ML->isInnermost() && ExperimentalPrefInnermostLoopAlignment.getNumOccurrences()) diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index f93c54781846bf..575f9cf63cb5c2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1550,6 +1550,9 @@ namespace llvm { bool hasInlineStackProbe(const MachineFunction &MF) const override; StringRef getStackProbeSymbolName(const MachineFunction &MF) const override; + bool isStackProbeInstrDefinedFlagRegLiveIn( + const MachineBasicBlock *MBB) const override; + unsigned getStackProbeSize(const MachineFunction &MF) const; bool hasVectorBlend() const override { return true; }