Skip to content

Commit

Permalink
[CFI][stackprobe] Shrink wrapper select safe prologue insertion block…
Browse files Browse the repository at this point in the history
… when inline stack probing is enabled

Summary:
For the switch-case code at the entry of this function:
https://github.com/llvm/llvm-project/blob/main/libcxx/include/__algorithm/stable_sort.h#L193C1-L193C5
LLVM produces machine code at one point like:

```
bb.0:
 successors: %bb.19, %bb.20
  liveins: $x0, $x1, $x2, $x3, $x4
  %45:gpr64common = COPY $x2
  %48:gpr64 = SUBSXri %45:gpr64common, 2, 0, implicit-def $nzcv;
  Bcc 3, %bb.19, implicit $nzcv;
  B %bb.20;

bb.20:
; predecessors: %bb.0
  successors: %bb.1, %bb.3

  %49:gpr64 = SUBSXri %45:gpr64common, 2, 0, implicit-def $nzcv;
  Bcc 1, %bb.3, implicit $nzcv;
  B %bb.1;
```

The `machine-cse` pass later removes the `SUBSXri` instruction in the 2nd
block and adds `$nzcv` as its `liveins`, which is legitimate:

```
bb.0:
 successors: %bb.19, %bb.20
  liveins: $x0, $x1, $x2, $x3, $x4
  %45:gpr64common = COPY $x2
  %48:gpr64 = SUBSXri %45:gpr64common, 2, 0, implicit-def $nzcv;
  Bcc 3, %bb.19, implicit $nzcv;
  B %bb.20;

bb.20:
; predecessors: %bb.0
  successors: %bb.1, %bb.3
  liveins: $nzcv

  Bcc 1, %bb.3, implicit $nzcv;
  B %bb.1;
```

The `shrinkwrap` pass later decides that we can put prologue inside `bb.20`,
which also seems fine. And then the `prologepilog` pass inserts regular prologue
sequence in the 2nd basic block, plus some stack probing code, which trashes
`$nzcv` and is caught by an assertion:

```
Assertion `!LiveRegs.contains(Op.getReg()) && "live register clobbered by inserted prologue instructions"' failed.
```

In the 2nd block, the inserted stack probing instruction (with the destination
as `x9`) redefines the live-in register `$nzcv`:

```
bb.1:
; predecessors: %bb.0
  successors: %bb.2, %bb.4
  liveins: $nzcv, $x0, $x1, $x2, $x3, $x4, $lr, $fp, $x27, $x28, $x25, $x26, $x23, $x24, $x21, $x22, $x19, $x20
  early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp(tied-def 0), -12 :: (store (s64) into %stack.25), (store (s64) into %stack.24)
  frame-setup STPXi killed $x28, killed $x27, $sp, 2 :: (store (s64) into %stack.23), (store (s64) into %stack.22)
  frame-setup STPXi killed $x26, killed $x25, $sp, 4 :: (store (s64) into %stack.21), (store (s64) into %stack.20)
  frame-setup STPXi killed $x24, killed $x23, $sp, 6 :: (store (s64) into %stack.19), (store (s64) into %stack.18)
  frame-setup STPXi killed $x22, killed $x21, $sp, 8 :: (store (s64) into %stack.17), (store (s64) into %stack.16)
  frame-setup STPXi killed $x20, killed $x19, $sp, 10 :: (store (s64) into %stack.15), (store (s64) into %stack.14)
  $x9 = PROBED_STACKALLOC 432, 96, 0, implicit-def $sp, implicit-def $nzcv, implicit $sp
  frame-setup CFI_INSTRUCTION def_cfa_offset 528
  frame-setup CFI_INSTRUCTION offset $w19, -8
  frame-setup CFI_INSTRUCTION offset $w20, -16
  frame-setup CFI_INSTRUCTION offset $w21, -24
  frame-setup CFI_INSTRUCTION offset $w22, -32
  frame-setup CFI_INSTRUCTION offset $w23, -40
  frame-setup CFI_INSTRUCTION offset $w24, -48
  frame-setup CFI_INSTRUCTION offset $w25, -56
  frame-setup CFI_INSTRUCTION offset $w26, -64
  frame-setup CFI_INSTRUCTION offset $w27, -72
  frame-setup CFI_INSTRUCTION offset $w28, -80
  frame-setup CFI_INSTRUCTION offset $w30, -88
  frame-setup CFI_INSTRUCTION offset $w29, -96
  renamable $x21 = COPY $x1
  renamable $x20 = COPY $x0
  Bcc 1, %bb.4, implicit $nzcv;
  B %bb.2;
```

One way to fix this is to change `ShrinkWrap` to go one scope out when such
situation is detected, so potential stack probing instruction won't trash
the lived in flag register.
  • Loading branch information
YongKang Zhu authored and yozhu committed Feb 13, 2024
1 parent 9dd2c59 commit 7a7b79a
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 0 deletions.
5 changes: 5 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -2010,6 +2010,11 @@ class TargetLoweringBase {

virtual bool hasInlineStackProbe(const MachineFunction &MF) const { return false; }

virtual bool
isStackProbeInstrDefinedFlagRegLiveIn(const MachineBasicBlock *MBB) const {
return false;
}

virtual StringRef getStackProbeSymbolName(const MachineFunction &MF) const {
return "";
}
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/CodeGen/ShrinkWrap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -957,6 +957,15 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
if (!ArePointsInteresting())
return Changed;

const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
while (TLI->hasInlineStackProbe(MF) &&
TLI->isStackProbeInstrDefinedFlagRegLiveIn(Save)) {
Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT);
if (!ArePointsInteresting())
return Changed;
}

LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: "
<< printMBBReference(*Save) << ' '
<< "\nRestore: " << printMBBReference(*Restore) << '\n');
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27349,3 +27349,8 @@ bool AArch64TargetLowering::hasInlineStackProbe(
return !Subtarget->isTargetWindows() &&
MF.getInfo<AArch64FunctionInfo>()->hasStackProbing();
}

bool AArch64TargetLowering::isStackProbeInstrDefinedFlagRegLiveIn(
const MachineBasicBlock *MBB) const {
return MBB->isLiveIn(AArch64::NZCV);
}
3 changes: 3 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -997,6 +997,9 @@ class AArch64TargetLowering : public TargetLowering {
/// True if stack clash protection is enabled for this functions.
bool hasInlineStackProbe(const MachineFunction &MF) const override;

bool isStackProbeInstrDefinedFlagRegLiveIn(
const MachineBasicBlock *MBB) const override;

private:
/// Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57882,6 +57882,11 @@ X86TargetLowering::getStackProbeSize(const MachineFunction &MF) const {
4096);
}

bool X86TargetLowering::isStackProbeInstrDefinedFlagRegLiveIn(
const MachineBasicBlock *MBB) const {
return MBB->isLiveIn(X86::EFLAGS);
}

Align X86TargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
if (ML && ML->isInnermost() &&
ExperimentalPrefInnermostLoopAlignment.getNumOccurrences())
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1550,6 +1550,9 @@ namespace llvm {
bool hasInlineStackProbe(const MachineFunction &MF) const override;
StringRef getStackProbeSymbolName(const MachineFunction &MF) const override;

bool isStackProbeInstrDefinedFlagRegLiveIn(
const MachineBasicBlock *MBB) const override;

unsigned getStackProbeSize(const MachineFunction &MF) const;

bool hasVectorBlend() const override { return true; }
Expand Down

0 comments on commit 7a7b79a

Please sign in to comment.