From 25cf3286427d2e3d3871edaaaa66fb937499052c Mon Sep 17 00:00:00 2001 From: weiguozhi <57237827+weiguozhi@users.noreply.github.com> Date: Tue, 6 Aug 2024 16:18:20 -0700 Subject: [PATCH] Spill/restore FP/BP around instructions in which they are clobbered (#81048) This patch fixes https://github.com/llvm/llvm-project/issues/17204. If a base pointer is used in a function, and it is clobbered by an instruction (typically an inline asm), current register allocator can't handle this situation, so BP becomes garbage after those instructions. It can also occur to FP in theory. We can spill and reload FP/BP registers around those instructions. But normal spill/reload instructions also use FP/BP, so we can't spill them into normal spill slots, instead we spill them into the top of stack by using SP register. --- .../llvm/CodeGen/TargetFrameLowering.h | 4 + llvm/lib/CodeGen/CFIInstrInserter.cpp | 33 +- llvm/lib/CodeGen/PrologEpilogInserter.cpp | 5 + llvm/lib/Target/X86/X86FrameLowering.cpp | 320 ++++++++++++++++++ llvm/lib/Target/X86/X86FrameLowering.h | 25 ++ .../X86/apx/push2-pop2-vector-register.ll | 4 + llvm/test/CodeGen/X86/apx/push2-pop2.ll | 24 ++ llvm/test/CodeGen/X86/apx/pushp-popp.ll | 4 + llvm/test/CodeGen/X86/avx512-intel-ocl.ll | 8 + llvm/test/CodeGen/X86/clobber_base_ptr.ll | 118 +++++++ llvm/test/CodeGen/X86/clobber_frame_ptr.ll | 159 +++++++++ .../test/CodeGen/X86/clobber_frame_ptr_x32.ll | 53 +++ llvm/test/CodeGen/X86/i386-baseptr.ll | 4 + .../X86/inline-asm-function-call-pic.ll | 4 + llvm/test/CodeGen/X86/x86-32-intrcc.ll | 4 + llvm/test/CodeGen/X86/x86-64-baseptr.ll | 12 + .../CodeGen/X86/x86-64-flags-intrinsics.ll | 8 + 17 files changed, 783 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/X86/clobber_base_ptr.ll create mode 100644 llvm/test/CodeGen/X86/clobber_frame_ptr.ll create mode 100644 llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h index d8c9d0a432ad8f..3df9e56db38a43 100644 --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -475,6 +475,10 @@ class TargetFrameLowering { /// debug info. virtual DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const; + /// If frame pointer or base pointer is clobbered by an instruction, we should + /// spill/restore it around that instruction. + virtual void spillFPBP(MachineFunction &MF) const {} + /// This method is called at the end of prolog/epilog code insertion, so /// targets can emit remarks based on the final frame layout. virtual void emitRemarks(const MachineFunction &MF, diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp index 06de92515c0444..f5bedc7b8ecdfc 100644 --- a/llvm/lib/CodeGen/CFIInstrInserter.cpp +++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" using namespace llvm; @@ -184,6 +185,10 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { unsigned NumRegs = TRI.getNumSupportedRegs(*MF); BitVector CSRSaved(NumRegs), CSRRestored(NumRegs); +#ifndef NDEBUG + int RememberState = 0; +#endif + // Determine cfa offset and register set by the block. for (MachineInstr &MI : *MBBInfo.MBB) { if (MI.isCFIInstruction()) { @@ -228,17 +233,25 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { case MCCFIInstruction::OpRememberState: // TODO: Add support for handling cfi_remember_state. #ifndef NDEBUG - report_fatal_error( - "Support for cfi_remember_state not implemented! Value of CFA " - "may be incorrect!\n"); + // Currently we need cfi_remember_state and cfi_restore_state to be in + // the same BB, so it will not impact outgoing CFA. + ++RememberState; + if (RememberState != 1) + MF->getContext().reportError( + SMLoc(), + "Support for cfi_remember_state not implemented! Value of CFA " + "may be incorrect!\n"); #endif break; case MCCFIInstruction::OpRestoreState: // TODO: Add support for handling cfi_restore_state. #ifndef NDEBUG - report_fatal_error( - "Support for cfi_restore_state not implemented! Value of CFA may " - "be incorrect!\n"); + --RememberState; + if (RememberState != 0) + MF->getContext().reportError( + SMLoc(), + "Support for cfi_restore_state not implemented! Value of CFA may " + "be incorrect!\n"); #endif break; // Other CFI directives do not affect CFA value. @@ -264,6 +277,14 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { } } +#ifndef NDEBUG + if (RememberState != 0) + MF->getContext().reportError( + SMLoc(), + "Support for cfi_remember_state not implemented! Value of CFA may be " + "incorrect!\n"); +#endif + MBBInfo.Processed = true; // Update outgoing CFA info. diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index f4490873cfdcdb..ee03eaa8ae527c 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -228,6 +228,11 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) { FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF); ORE = &getAnalysis().getORE(); + // Spill frame pointer and/or base pointer registers if they are clobbered. + // It is placed before call frame instruction elimination so it will not mess + // with stack arguments. + TFI->spillFPBP(MF); + // Calculate the MaxCallFrameSize value for the function's frame // information. Also eliminates call frame pseudo instructions. calculateCallFrameInfo(MF); diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index bdc9a0d29670a1..77dac1197f85e9 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -4227,3 +4227,323 @@ void X86FrameLowering::restoreWinEHStackPointersInParent( /*RestoreSP=*/IsSEH); } } + +// Compute the alignment gap between current SP after spilling FP/BP and the +// next properly aligned stack offset. +static int computeFPBPAlignmentGap(MachineFunction &MF, + const TargetRegisterClass *RC, + unsigned NumSpilledRegs) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + unsigned AllocSize = TRI->getSpillSize(*RC) * NumSpilledRegs; + Align StackAlign = MF.getSubtarget().getFrameLowering()->getStackAlign(); + unsigned AlignedSize = alignTo(AllocSize, StackAlign); + return AlignedSize - AllocSize; +} + +void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF, + MachineBasicBlock::iterator BeforeMI, + Register FP, Register BP, + int SPAdjust) const { + assert(FP.isValid() || BP.isValid()); + + MachineBasicBlock *MBB = BeforeMI->getParent(); + DebugLoc DL = BeforeMI->getDebugLoc(); + + // Spill FP. + if (FP.isValid()) { + BuildMI(*MBB, BeforeMI, DL, + TII.get(getPUSHOpcode(MF.getSubtarget()))) + .addReg(FP); + } + + // Spill BP. + if (BP.isValid()) { + BuildMI(*MBB, BeforeMI, DL, + TII.get(getPUSHOpcode(MF.getSubtarget()))) + .addReg(BP); + } + + // Make sure SP is aligned. + if (SPAdjust) + emitSPUpdate(*MBB, BeforeMI, DL, -SPAdjust, false); + + // Emit unwinding information. + if (FP.isValid() && needsDwarfCFI(MF)) { + // Emit .cfi_remember_state to remember old frame. + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr)); + BuildMI(*MBB, BeforeMI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + // Setup new CFA value with DW_CFA_def_cfa_expression: + // DW_OP_breg7+offset, DW_OP_deref, DW_OP_consts 16, DW_OP_plus + SmallString<64> CfaExpr; + uint8_t buffer[16]; + int Offset = SPAdjust; + if (BP.isValid()) + Offset += TRI->getSpillSize(*TRI->getMinimalPhysRegClass(BP)); + // If BeforeMI is a frame setup instruction, we need to adjust the position + // and offset of the new cfi instruction. + if (TII.isFrameSetup(*BeforeMI)) { + Offset += alignTo(TII.getFrameSize(*BeforeMI), getStackAlign()); + BeforeMI = std::next(BeforeMI); + } + Register StackPtr = TRI->getStackRegister(); + if (STI.isTarget64BitILP32()) + StackPtr = Register(getX86SubSuperRegister(StackPtr, 64)); + unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackPtr, true); + CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfStackPtr)); + CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer)); + CfaExpr.push_back(dwarf::DW_OP_deref); + CfaExpr.push_back(dwarf::DW_OP_consts); + CfaExpr.append(buffer, buffer + encodeSLEB128(SlotSize * 2, buffer)); + CfaExpr.push_back((uint8_t)dwarf::DW_OP_plus); + + SmallString<64> DefCfaExpr; + DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression); + DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer)); + DefCfaExpr.append(CfaExpr.str()); + BuildCFI(*MBB, BeforeMI, DL, + MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()), + MachineInstr::FrameSetup); + } +} + +void X86FrameLowering::restoreFPBPUsingSP(MachineFunction &MF, + MachineBasicBlock::iterator AfterMI, + Register FP, Register BP, + int SPAdjust) const { + assert(FP.isValid() || BP.isValid()); + + // Adjust SP so it points to spilled FP or BP. + MachineBasicBlock *MBB = AfterMI->getParent(); + MachineBasicBlock::iterator Pos = std::next(AfterMI); + DebugLoc DL = AfterMI->getDebugLoc(); + if (SPAdjust) + emitSPUpdate(*MBB, Pos, DL, SPAdjust, false); + + // Restore BP. + if (BP.isValid()) { + BuildMI(*MBB, Pos, DL, + TII.get(getPOPOpcode(MF.getSubtarget())), BP); + } + + // Restore FP. + if (FP.isValid()) { + BuildMI(*MBB, Pos, DL, + TII.get(getPOPOpcode(MF.getSubtarget())), FP); + + // Emit unwinding information. + if (needsDwarfCFI(MF)) { + // Restore original frame with .cfi_restore_state. + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createRestoreState(nullptr)); + BuildMI(*MBB, Pos, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + } +} + +void X86FrameLowering::saveAndRestoreFPBPUsingSP( + MachineFunction &MF, MachineBasicBlock::iterator BeforeMI, + MachineBasicBlock::iterator AfterMI, bool SpillFP, bool SpillBP) const { + assert(SpillFP || SpillBP); + + Register FP, BP; + const TargetRegisterClass *RC; + unsigned NumRegs = 0; + + if (SpillFP) { + FP = TRI->getFrameRegister(MF); + if (STI.isTarget64BitILP32()) + FP = Register(getX86SubSuperRegister(FP, 64)); + RC = TRI->getMinimalPhysRegClass(FP); + ++NumRegs; + } + if (SpillBP) { + BP = TRI->getBaseRegister(); + if (STI.isTarget64BitILP32()) + BP = Register(getX86SubSuperRegister(BP, 64)); + RC = TRI->getMinimalPhysRegClass(BP); + ++NumRegs; + } + int SPAdjust = computeFPBPAlignmentGap(MF, RC, NumRegs); + + spillFPBPUsingSP(MF, BeforeMI, FP, BP, SPAdjust); + restoreFPBPUsingSP(MF, AfterMI, FP, BP, SPAdjust); +} + +bool X86FrameLowering::skipSpillFPBP( + MachineFunction &MF, MachineBasicBlock::reverse_iterator &MI) const { + if (MI->getOpcode() == X86::LCMPXCHG16B_SAVE_RBX) { + // The pseudo instruction LCMPXCHG16B_SAVE_RBX is generated in the form + // SaveRbx = COPY RBX + // SaveRbx = LCMPXCHG16B_SAVE_RBX ..., SaveRbx, implicit-def rbx + // And later LCMPXCHG16B_SAVE_RBX is expanded to restore RBX from SaveRbx. + // We should skip this instruction sequence. + int FI; + unsigned Reg; + while (!(MI->getOpcode() == TargetOpcode::COPY && + MI->getOperand(1).getReg() == X86::RBX) && + !((Reg = TII.isStoreToStackSlot(*MI, FI)) && Reg == X86::RBX)) + ++MI; + return true; + } + return false; +} + +static bool isFPBPAccess(const MachineInstr &MI, Register FP, Register BP, + const TargetRegisterInfo *TRI, bool &AccessFP, + bool &AccessBP) { + AccessFP = AccessBP = false; + if (FP) { + if (MI.findRegisterUseOperandIdx(FP, TRI, false) != -1 || + MI.findRegisterDefOperandIdx(FP, TRI, false, true) != -1) + AccessFP = true; + } + if (BP) { + if (MI.findRegisterUseOperandIdx(BP, TRI, false) != -1 || + MI.findRegisterDefOperandIdx(BP, TRI, false, true) != -1) + AccessBP = true; + } + return AccessFP || AccessBP; +} + +// Invoke instruction has been lowered to normal function call. We try to figure +// out if MI comes from Invoke. +// Do we have any better method? +static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels) { + if (!MI.isCall()) + return false; + if (InsideEHLabels) + return true; + + const MachineBasicBlock *MBB = MI.getParent(); + if (!MBB->hasEHPadSuccessor()) + return false; + + // Check if there is another call instruction from MI to the end of MBB. + MachineBasicBlock::const_iterator MBBI = MI, ME = MBB->end(); + for (++MBBI; MBBI != ME; ++MBBI) + if (MBBI->isCall()) + return false; + return true; +} + +/// If a function uses base pointer and the base pointer is clobbered by inline +/// asm, RA doesn't detect this case, and after the inline asm, the base pointer +/// contains garbage value. +/// For example if a 32b x86 function uses base pointer esi, and esi is +/// clobbered by following inline asm +/// asm("rep movsb" : "+D"(ptr), "+S"(x), "+c"(c)::"memory"); +/// We need to save esi before the asm and restore it after the asm. +/// +/// The problem can also occur to frame pointer if there is a function call, and +/// the callee uses a different calling convention and clobbers the fp. +/// +/// Because normal frame objects (spill slots) are accessed through fp/bp +/// register, so we can't spill fp/bp to normal spill slots. +/// +/// FIXME: There are 2 possible enhancements: +/// 1. In many cases there are different physical registers not clobbered by +/// inline asm, we can use one of them as base pointer. Or use a virtual +/// register as base pointer and let RA allocate a physical register to it. +/// 2. If there is no other instructions access stack with fp/bp from the +/// inline asm to the epilog, and no cfi requirement for a correct fp, we can +/// skip the save and restore operations. +void X86FrameLowering::spillFPBP(MachineFunction &MF) const { + Register FP, BP; + const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); + if (TFI.hasFP(MF)) + FP = TRI->getFrameRegister(MF); + if (TRI->hasBasePointer(MF)) + BP = TRI->getBaseRegister(); + if (!FP && !BP) + return; + + for (MachineBasicBlock &MBB : MF) { + bool InsideEHLabels = false; + auto MI = MBB.rbegin(), ME = MBB.rend(); + auto TermMI = MBB.getFirstTerminator(); + if (TermMI != MBB.begin()) + MI = *(std::prev(TermMI)); + + while (MI != ME) { + // Skip frame setup/destroy instructions. + // Skip Invoke (call inside try block) instructions. + // Skip instructions handled by target. + if (MI->getFlag(MachineInstr::MIFlag::FrameSetup) || + MI->getFlag(MachineInstr::MIFlag::FrameDestroy) || + isInvoke(*MI, InsideEHLabels) || skipSpillFPBP(MF, MI)) { + ++MI; + continue; + } + + if (MI->getOpcode() == TargetOpcode::EH_LABEL) { + InsideEHLabels = !InsideEHLabels; + ++MI; + continue; + } + + bool AccessFP, AccessBP; + // Check if fp or bp is used in MI. + if (!isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)) { + ++MI; + continue; + } + + // Look for the range [DefMI, KillMI] in which fp or bp is defined and + // used. + bool FPLive = false, BPLive = false; + bool SpillFP = false, SpillBP = false; + auto DefMI = MI, KillMI = MI; + do { + SpillFP |= AccessFP; + SpillBP |= AccessBP; + + // Maintain FPLive and BPLive. + if (FPLive && MI->findRegisterDefOperandIdx(FP, TRI, false, true) != -1) + FPLive = false; + if (FP && MI->findRegisterUseOperandIdx(FP, TRI, false) != -1) + FPLive = true; + if (BPLive && MI->findRegisterDefOperandIdx(BP, TRI, false, true) != -1) + BPLive = false; + if (BP && MI->findRegisterUseOperandIdx(BP, TRI, false) != -1) + BPLive = true; + + DefMI = MI++; + } while ((MI != ME) && + (FPLive || BPLive || + isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP))); + + // Don't need to save/restore if FP is accessed through llvm.frameaddress. + if (FPLive && !SpillBP) + continue; + + // If the bp is clobbered by a call, we should save and restore outside of + // the frame setup instructions. + if (KillMI->isCall() && DefMI != ME) { + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + auto FrameSetup = std::next(DefMI); + // Look for frame setup instruction toward the start of the BB. + // If we reach another call instruction, it means no frame setup + // instruction for the current call instruction. + while (FrameSetup != ME && !TII.isFrameSetup(*FrameSetup) && + !FrameSetup->isCall()) + ++FrameSetup; + // If a frame setup instruction is found, we need to find out the + // corresponding frame destroy instruction. + if (FrameSetup != ME && TII.isFrameSetup(*FrameSetup)) { + while (!TII.isFrameInstr(*KillMI)) + --KillMI; + DefMI = FrameSetup; + MI = DefMI; + ++MI; + } + } + + // Call target function to spill and restore FP and BP registers. + saveAndRestoreFPBPUsingSP(MF, &(*DefMI), &(*KillMI), SpillFP, SpillBP); + } + } +} diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h index 2dc9ecc6109d78..e21f6ab3d16d5f 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -103,6 +103,8 @@ class X86FrameLowering : public TargetFrameLowering { MutableArrayRef CSI, const TargetRegisterInfo *TRI) const override; + void spillFPBP(MachineFunction &MF) const override; + bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override; @@ -267,6 +269,29 @@ class X86FrameLowering : public TargetFrameLowering { void emitCatchRetReturnValue(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineInstr *CatchRet) const; + + /// Issue instructions to allocate stack space and spill frame pointer and/or + /// base pointer to stack using stack pointer register. + void spillFPBPUsingSP(MachineFunction &MF, + const MachineBasicBlock::iterator BeforeMI, Register FP, + Register BP, int SPAdjust) const; + + /// Issue instructions to restore frame pointer and/or base pointer from stack + /// using stack pointer register, and free stack space. + void restoreFPBPUsingSP(MachineFunction &MF, + const MachineBasicBlock::iterator AfterMI, + Register FP, Register BP, int SPAdjust) const; + + void saveAndRestoreFPBPUsingSP(MachineFunction &MF, + MachineBasicBlock::iterator BeforeMI, + MachineBasicBlock::iterator AfterMI, + bool SpillFP, bool SpillBP) const; + + // If MI uses fp/bp, but target can handle it, and doesn't want to be spilled + // again, this function should return true, and update MI so we will not check + // any instructions from related sequence. + bool skipSpillFPBP(MachineFunction &MF, + MachineBasicBlock::reverse_iterator &MI) const; }; } // End llvm namespace diff --git a/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll b/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll index aa5c54d30e3bc4..f20c4c1ae27867 100644 --- a/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll +++ b/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll @@ -43,8 +43,12 @@ define void @widget(float %arg) nounwind { ; FRAME-NEXT: xorl %r8d, %r8d ; FRAME-NEXT: callq *%rsi ; FRAME-NEXT: movss %xmm6, 0 +; FRAME-NEXT: pushq %rbp +; FRAME-NEXT: pushq %rax ; FRAME-NEXT: #APP ; FRAME-NEXT: #NO_APP +; FRAME-NEXT: popq %rax +; FRAME-NEXT: popq %rbp ; FRAME-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload ; FRAME-NEXT: addq $48, %rsp ; FRAME-NEXT: pop2 %r15, %rsi diff --git a/llvm/test/CodeGen/X86/apx/push2-pop2.ll b/llvm/test/CodeGen/X86/apx/push2-pop2.ll index 25139f1da8272c..6bd9f525090ee2 100644 --- a/llvm/test/CodeGen/X86/apx/push2-pop2.ll +++ b/llvm/test/CodeGen/X86/apx/push2-pop2.ll @@ -24,8 +24,12 @@ define void @csr1() nounwind { ; FRAME: # %bb.0: # %entry ; FRAME-NEXT: pushq %rbp ; FRAME-NEXT: movq %rsp, %rbp +; FRAME-NEXT: pushq %rbp +; FRAME-NEXT: pushq %rax ; FRAME-NEXT: #APP ; FRAME-NEXT: #NO_APP +; FRAME-NEXT: popq %rax +; FRAME-NEXT: popq %rbp ; FRAME-NEXT: popq %rbp ; FRAME-NEXT: retq entry: @@ -59,8 +63,12 @@ define void @csr2() nounwind { ; FRAME-NEXT: pushq %rbp ; FRAME-NEXT: movq %rsp, %rbp ; FRAME-NEXT: pushq %r15 +; FRAME-NEXT: pushq %rbp +; FRAME-NEXT: pushq %rax ; FRAME-NEXT: #APP ; FRAME-NEXT: #NO_APP +; FRAME-NEXT: popq %rax +; FRAME-NEXT: popq %rbp ; FRAME-NEXT: popq %r15 ; FRAME-NEXT: popq %rbp ; FRAME-NEXT: retq @@ -95,8 +103,12 @@ define void @csr3() nounwind { ; FRAME-NEXT: pushq %rbp ; FRAME-NEXT: movq %rsp, %rbp ; FRAME-NEXT: push2 %r14, %r15 +; FRAME-NEXT: pushq %rbp +; FRAME-NEXT: pushq %rax ; FRAME-NEXT: #APP ; FRAME-NEXT: #NO_APP +; FRAME-NEXT: popq %rax +; FRAME-NEXT: popq %rbp ; FRAME-NEXT: pop2 %r15, %r14 ; FRAME-NEXT: popq %rbp ; FRAME-NEXT: retq @@ -136,8 +148,12 @@ define void @csr4() nounwind { ; FRAME-NEXT: movq %rsp, %rbp ; FRAME-NEXT: push2 %r14, %r15 ; FRAME-NEXT: pushq %r13 +; FRAME-NEXT: pushq %rbp +; FRAME-NEXT: pushq %rax ; FRAME-NEXT: #APP ; FRAME-NEXT: #NO_APP +; FRAME-NEXT: popq %rax +; FRAME-NEXT: popq %rbp ; FRAME-NEXT: popq %r13 ; FRAME-NEXT: pop2 %r15, %r14 ; FRAME-NEXT: popq %rbp @@ -178,8 +194,12 @@ define void @csr5() nounwind { ; FRAME-NEXT: movq %rsp, %rbp ; FRAME-NEXT: push2 %r14, %r15 ; FRAME-NEXT: push2 %r12, %r13 +; FRAME-NEXT: pushq %rbp +; FRAME-NEXT: pushq %rax ; FRAME-NEXT: #APP ; FRAME-NEXT: #NO_APP +; FRAME-NEXT: popq %rax +; FRAME-NEXT: popq %rbp ; FRAME-NEXT: pop2 %r13, %r12 ; FRAME-NEXT: pop2 %r15, %r14 ; FRAME-NEXT: popq %rbp @@ -225,8 +245,12 @@ define void @csr6() nounwind { ; FRAME-NEXT: push2 %r14, %r15 ; FRAME-NEXT: push2 %r12, %r13 ; FRAME-NEXT: pushq %rbx +; FRAME-NEXT: pushq %rbp +; FRAME-NEXT: pushq %rax ; FRAME-NEXT: #APP ; FRAME-NEXT: #NO_APP +; FRAME-NEXT: popq %rax +; FRAME-NEXT: popq %rbp ; FRAME-NEXT: popq %rbx ; FRAME-NEXT: pop2 %r13, %r12 ; FRAME-NEXT: pop2 %r15, %r14 diff --git a/llvm/test/CodeGen/X86/apx/pushp-popp.ll b/llvm/test/CodeGen/X86/apx/pushp-popp.ll index ad4306fccce669..625e70b07198e8 100644 --- a/llvm/test/CodeGen/X86/apx/pushp-popp.ll +++ b/llvm/test/CodeGen/X86/apx/pushp-popp.ll @@ -18,8 +18,12 @@ define void @csr2() nounwind { ; FRAME-NEXT: pushp %rbp ; FRAME-NEXT: movq %rsp, %rbp ; FRAME-NEXT: pushp %r15 +; FRAME-NEXT: pushp %rbp +; FRAME-NEXT: pushq %rax ; FRAME-NEXT: #APP ; FRAME-NEXT: #NO_APP +; FRAME-NEXT: popq %rax +; FRAME-NEXT: popp %rbp ; FRAME-NEXT: popp %r15 ; FRAME-NEXT: popp %rbp ; FRAME-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll index 25d182afd66e71..78870278eeace9 100644 --- a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll +++ b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll @@ -69,8 +69,12 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind { ; X64-NEXT: andq $-64, %rsp ; X64-NEXT: subq $128, %rsp ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 +; X64-NEXT: pushq %rbp +; X64-NEXT: pushq %rax ; X64-NEXT: movq %rsp, %rdi ; X64-NEXT: callq _func_float16_ptr +; X64-NEXT: addq $8, %rsp +; X64-NEXT: popq %rbp ; X64-NEXT: vaddps (%rsp), %zmm0, %zmm0 ; X64-NEXT: leaq -16(%rbp), %rsp ; X64-NEXT: popq %r12 @@ -149,8 +153,12 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind { ; X64-NEXT: subq $128, %rsp ; X64-NEXT: vmovaps %zmm1, %zmm16 ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 +; X64-NEXT: pushq %rbp +; X64-NEXT: pushq %rax ; X64-NEXT: movq %rsp, %rdi ; X64-NEXT: callq _func_float16_ptr +; X64-NEXT: addq $8, %rsp +; X64-NEXT: popq %rbp ; X64-NEXT: vaddps %zmm16, %zmm0, %zmm0 ; X64-NEXT: vaddps (%rsp), %zmm0, %zmm0 ; X64-NEXT: leaq -16(%rbp), %rsp diff --git a/llvm/test/CodeGen/X86/clobber_base_ptr.ll b/llvm/test/CodeGen/X86/clobber_base_ptr.ll new file mode 100644 index 00000000000000..2c39560f02d160 --- /dev/null +++ b/llvm/test/CodeGen/X86/clobber_base_ptr.ll @@ -0,0 +1,118 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:32-n8:16:32-a:0:32-S32" +target triple = "i386-pc-windows-gnu" + +; This function uses esi as base pointer, the inline asm clobbers esi, so we +; should save esi using esp before the inline asm, and restore esi after the +; inline asm. + +define i32 @clober_bp() { +; CHECK-LABEL: clober_bp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset %ebp, -8 +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: .cfi_def_cfa_register %ebp +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: andl $-16, %esp +; CHECK-NEXT: subl $16, %esp +; CHECK-NEXT: movl %esp, %esi +; CHECK-NEXT: .cfi_offset %esi, -16 +; CHECK-NEXT: .cfi_offset %edi, -12 +; CHECK-NEXT: movl $4, 12(%esi) +; CHECK-NEXT: movl 12(%esi), %eax +; CHECK-NEXT: addl $3, %eax +; CHECK-NEXT: andl $-4, %eax +; CHECK-NEXT: calll __alloca +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: andl $-16, %eax +; CHECK-NEXT: movl %eax, %esp +; CHECK-NEXT: movl $1, (%eax) +; CHECK-NEXT: leal 8(%esi), %edi +; CHECK-NEXT: movl $4, %ecx +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: #APP +; CHECK-NEXT: rep movsb (%esi), %es:(%edi) +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: popl %esi +; CHECK-NEXT: movl 8(%esi), %eax +; CHECK-NEXT: leal -8(%ebp), %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl +entry: + %size = alloca i32, align 4 + %g = alloca i32, align 4 + store volatile i32 4, ptr %size, align 4 + %len = load volatile i32, ptr %size, align 4 + %var_array = alloca i8, i32 %len, align 16 + store i32 1, ptr %var_array, align 16 + %nil = call { ptr, ptr, i32 } asm "rep movsb", "={di},={si},={cx},0,1,2,~{memory},~{dirflag},~{fpsr},~{flags}"(ptr %g, ptr %var_array, i32 4) + %retval = load i32, ptr %g, align 4 + ret i32 %retval +} + +; This function has the same code except the inline asm also clobbers +; frame pointer. + +define i32 @clobber_bpfp() { +; CHECK-LABEL: clobber_bpfp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset %ebp, -8 +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: .cfi_def_cfa_register %ebp +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: andl $-16, %esp +; CHECK-NEXT: subl $16, %esp +; CHECK-NEXT: movl %esp, %esi +; CHECK-NEXT: .cfi_offset %esi, -16 +; CHECK-NEXT: .cfi_offset %edi, -12 +; CHECK-NEXT: movl $4, 12(%esi) +; CHECK-NEXT: movl 12(%esi), %eax +; CHECK-NEXT: addl $3, %eax +; CHECK-NEXT: andl $-4, %eax +; CHECK-NEXT: calll __alloca +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: andl $-16, %eax +; CHECK-NEXT: movl %eax, %esp +; CHECK-NEXT: movl $1, (%eax) +; CHECK-NEXT: leal 8(%esi), %edi +; CHECK-NEXT: movl $4, %ecx +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: .cfi_escape 0x0f, 0x06, 0x74, 0x04, 0x06, 0x11, 0x08, 0x22 # +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: #APP +; CHECK-NEXT: rep movsb (%esi), %es:(%edi) +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: movl 8(%esi), %eax +; CHECK-NEXT: leal -8(%ebp), %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl +entry: + %size = alloca i32, align 4 + %g = alloca i32, align 4 + store volatile i32 4, ptr %size, align 4 + %len = load volatile i32, ptr %size, align 4 + %var_array = alloca i8, i32 %len, align 16 + store i32 1, ptr %var_array, align 16 + %nil = call { ptr, ptr, i32 } asm "rep movsb", "={di},={si},={cx},0,1,2,~{memory},~{dirflag},~{fpsr},~{flags},~{ebp}"(ptr %g, ptr %var_array, i32 4) + %retval = load i32, ptr %g, align 4 + ret i32 %retval +} + diff --git a/llvm/test/CodeGen/X86/clobber_frame_ptr.ll b/llvm/test/CodeGen/X86/clobber_frame_ptr.ll new file mode 100644 index 00000000000000..6209e1a85e9e19 --- /dev/null +++ b/llvm/test/CodeGen/X86/clobber_frame_ptr.ll @@ -0,0 +1,159 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=x86_64-pc-linux -stackrealign -verify-machineinstrs < %s | FileCheck %s + +; Calling convention ghccc uses ebp to pass parameter, so calling a function +; using ghccc clobbers ebp. We should save and restore ebp around such a call +; if ebp is used as frame pointer. + +declare ghccc i32 @external(i32) + +; Basic test with ghccc calling convention. +define i32 @test1(i32 %0, i32 %1) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: andq $-16, %rsp +; CHECK-NEXT: subq $16, %rsp +; CHECK-NEXT: .cfi_offset %rbx, -56 +; CHECK-NEXT: .cfi_offset %r12, -48 +; CHECK-NEXT: .cfi_offset %r13, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: .cfi_escape 0x0f, 0x06, 0x77, 0x08, 0x06, 0x11, 0x10, 0x22 # +; CHECK-NEXT: movl %esi, %ebp +; CHECK-NEXT: movq %rdi, %r13 +; CHECK-NEXT: callq external@PLT +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: leaq -40(%rbp), %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq + %x = call ghccc i32 @external(i32 %0, i32 %1) + ret i32 %x +} + +; Calling convention hipe has similar behavior. It clobbers rbp but not rbx. + +declare cc 11 i64 @hipe1(i64) +declare cc 11 i64 @hipe2(i64, i64, i64, i64, i64, i64, i64) + +; Basic test with hipe calling convention. +define i64 @test2(i64 %a0, i64 %a1) { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: andq $-16, %rsp +; CHECK-NEXT: subq $16, %rsp +; CHECK-NEXT: .cfi_offset %rbx, -56 +; CHECK-NEXT: .cfi_offset %r12, -48 +; CHECK-NEXT: .cfi_offset %r13, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: .cfi_escape 0x0f, 0x06, 0x77, 0x08, 0x06, 0x11, 0x10, 0x22 # +; CHECK-NEXT: movq %rsi, %rbp +; CHECK-NEXT: movq %rdi, %r15 +; CHECK-NEXT: callq hipe1@PLT +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: movq %r15, %rax +; CHECK-NEXT: leaq -40(%rbp), %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq + %x = call cc 11 i64 @hipe1(i64 %a0, i64 %a1) + ret i64 %x +} + +; Test with more arguments, so some of them are passed from stack. The spilling +; of rbp should not disturb stack arguments. +; fixme: current generated code is wrong because rbp is used to load passed in +; argument after rbp is assigned argument for function call, it is caused +; by x86-cf-opt. +define i64 @test3(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7) { +; CHECK-LABEL: test3: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: andq $-16, %rsp +; CHECK-NEXT: subq $16, %rsp +; CHECK-NEXT: .cfi_offset %rbx, -56 +; CHECK-NEXT: .cfi_offset %r12, -48 +; CHECK-NEXT: .cfi_offset %r13, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: .cfi_escape 0x0f, 0x06, 0x77, 0x08, 0x06, 0x11, 0x10, 0x22 # +; CHECK-NEXT: movq %rsi, %rbp +; CHECK-NEXT: movq %rdi, %r15 +; CHECK-NEXT: movq %rdx, %rsi +; CHECK-NEXT: movq %rcx, %rdx +; CHECK-NEXT: movq %r8, %rcx +; CHECK-NEXT: movq %r9, %r8 +; CHECK-NEXT: pushq 24(%rbp) +; CHECK-NEXT: pushq 16(%rbp) +; CHECK-NEXT: callq hipe2@PLT +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: addq $16, %rsp +; CHECK-NEXT: movq %r15, %rax +; CHECK-NEXT: leaq -40(%rbp), %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq + %x = call cc 11 i64 @hipe2(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7) + ret i64 %x +} diff --git a/llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll b/llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll new file mode 100644 index 00000000000000..25c951d8b1a109 --- /dev/null +++ b/llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s | FileCheck %s + +target triple = "x86_64-linux-gnux32" + +define i32 @foo() { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: subq $16, %rsp +; CHECK-NEXT: movl $4, -8(%rbp) +; CHECK-NEXT: movl $5, -4(%rbp) +; CHECK-NEXT: movl -8(%rbp), %eax +; CHECK-NEXT: movq %rsp, %rcx +; CHECK-NEXT: addq $15, %rax +; CHECK-NEXT: andq $-16, %rax +; CHECK-NEXT: movq %rcx, %rdx +; CHECK-NEXT: subq %rax, %rdx +; CHECK-NEXT: movq %rdx, %rsp +; CHECK-NEXT: negq %rax +; CHECK-NEXT: movl $1, (%rcx,%rax) +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: .cfi_escape 0x0f, 0x06, 0x77, 0x08, 0x06, 0x11, 0x10, 0x22 # +; CHECK-NEXT: movl $123, %ebp +; CHECK-NEXT: #APP +; CHECK-NEXT: nop +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: movl -4(%rbp), %eax +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq +entry: + %size = alloca i32, align 4 + %g = alloca i32, align 4 + store volatile i32 4, ptr %size, align 4 + store volatile i32 5, ptr %g, align 4 + %len = load volatile i32, ptr %size, align 4 + %var_array = alloca i8, i32 %len, align 16 + store i32 1, ptr %var_array, align 16 + call void asm "nop", "{ebp},~{memory}"(i32 123) + %retval = load i32, ptr %g, align 4 + ret i32 %retval +} diff --git a/llvm/test/CodeGen/X86/i386-baseptr.ll b/llvm/test/CodeGen/X86/i386-baseptr.ll index 08e4bde7353a42..777eb838b84cc7 100644 --- a/llvm/test/CodeGen/X86/i386-baseptr.ll +++ b/llvm/test/CodeGen/X86/i386-baseptr.ll @@ -109,10 +109,14 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 ; CHECK-NEXT: subl %eax, %edx ; CHECK-NEXT: movl %edx, %esp ; CHECK-NEXT: negl %eax +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $28, %esp ; CHECK-NEXT: movl $405, %esi # imm = 0x195 ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP +; CHECK-NEXT: addl $28, %esp +; CHECK-NEXT: popl %esi ; CHECK-NEXT: movl $405, %ebx # imm = 0x195 ; CHECK-NEXT: #APP ; CHECK-NEXT: nop diff --git a/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll b/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll index 3c98eead8d18f5..d3ca872509ad5a 100644 --- a/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll +++ b/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll @@ -37,6 +37,8 @@ define void @func() local_unnamed_addr #0 { ; CHECK-NEXT: .Ltmp0: ; CHECK-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %ebx ; CHECK-NEXT: calll static_func +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: subl $12, %esp ; CHECK-NEXT: #APP ; CHECK-EMPTY: ; CHECK-NEXT: calll static_func @@ -52,6 +54,8 @@ define void @func() local_unnamed_addr #0 { ; CHECK-NEXT: shrl $0, %esp ; CHECK-EMPTY: ; CHECK-NEXT: #NO_APP +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: popl %ebp entry: %call = tail call i32 @static_func() ;; We test call, CALL, and jmp. diff --git a/llvm/test/CodeGen/X86/x86-32-intrcc.ll b/llvm/test/CodeGen/X86/x86-32-intrcc.ll index 3c3944c2082bd5..a0f937e2c323b6 100644 --- a/llvm/test/CodeGen/X86/x86-32-intrcc.ll +++ b/llvm/test/CodeGen/X86/x86-32-intrcc.ll @@ -108,8 +108,10 @@ define x86_intrcc void @test_isr_clobbers(ptr byval(%struct.interrupt_frame) %fr ; CHECK-NEXT: pushl %eax ; CHECK-NEXT: andl $-16, %esp ; CHECK-NEXT: cld +; CHECK-NEXT: pushl %ebp ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP +; CHECK-NEXT: popl %ebp ; CHECK-NEXT: leal -12(%ebp), %esp ; CHECK-NEXT: popl %eax ; CHECK-NEXT: popl %ebx @@ -127,8 +129,10 @@ define x86_intrcc void @test_isr_clobbers(ptr byval(%struct.interrupt_frame) %fr ; CHECK0-NEXT: pushl %eax ; CHECK0-NEXT: andl $-16, %esp ; CHECK0-NEXT: cld +; CHECK0-NEXT: pushl %ebp ; CHECK0-NEXT: #APP ; CHECK0-NEXT: #NO_APP +; CHECK0-NEXT: popl %ebp ; CHECK0-NEXT: leal -12(%ebp), %esp ; CHECK0-NEXT: popl %eax ; CHECK0-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/x86-64-baseptr.ll b/llvm/test/CodeGen/X86/x86-64-baseptr.ll index 8cda4ba2814bba..020004def6e7ad 100644 --- a/llvm/test/CodeGen/X86/x86-64-baseptr.ll +++ b/llvm/test/CodeGen/X86/x86-64-baseptr.ll @@ -136,10 +136,14 @@ define void @clobber_base() #0 { ; X32ABI-NEXT: subl %eax, %edx ; X32ABI-NEXT: negl %eax ; X32ABI-NEXT: movl %edx, %esp +; X32ABI-NEXT: pushq %rbx +; X32ABI-NEXT: subl $24, %esp ; X32ABI-NEXT: movl $405, %ebx # imm = 0x195 ; X32ABI-NEXT: #APP ; X32ABI-NEXT: nop ; X32ABI-NEXT: #NO_APP +; X32ABI-NEXT: addl $24, %esp +; X32ABI-NEXT: popq %rbx ; X32ABI-NEXT: movl $8, %edx ; X32ABI-NEXT: #APP ; X32ABI-NEXT: movl %edx, (%ebx) @@ -268,6 +272,8 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 ; X32ABI-NEXT: subl %eax, %edx ; X32ABI-NEXT: negl %eax ; X32ABI-NEXT: movl %edx, %esp +; X32ABI-NEXT: pushq %rbx +; X32ABI-NEXT: subl $24, %esp ; X32ABI-NEXT: movl $405, %ebx # imm = 0x195 ; X32ABI-NEXT: #APP ; X32ABI-NEXT: nop @@ -275,6 +281,8 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 ; X32ABI-NEXT: #APP ; X32ABI-NEXT: nop ; X32ABI-NEXT: #NO_APP +; X32ABI-NEXT: addl $24, %esp +; X32ABI-NEXT: popq %rbx ; X32ABI-NEXT: movl $8, %edx ; X32ABI-NEXT: #APP ; X32ABI-NEXT: movl %edx, (%ebx) @@ -385,10 +393,14 @@ define void @vmw_host_printf(ptr %fmt, ...) nounwind { ; X32ABI-NEXT: movl $48, (%eax) ; X32ABI-NEXT: movl $8, (%eax) ; X32ABI-NEXT: xorl %eax, %eax +; X32ABI-NEXT: pushq %rbx +; X32ABI-NEXT: subl $24, %esp ; X32ABI-NEXT: xorl %ebx, %ebx ; X32ABI-NEXT: xorl %ecx, %ecx ; X32ABI-NEXT: #APP ; X32ABI-NEXT: #NO_APP +; X32ABI-NEXT: addl $24, %esp +; X32ABI-NEXT: popq %rbx ; X32ABI-NEXT: leal -8(%ebp), %esp ; X32ABI-NEXT: popq %rbx ; X32ABI-NEXT: popq %rbp diff --git a/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll b/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll index 47aefdbf0e466e..b4c18dd7f4573e 100644 --- a/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll +++ b/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll @@ -94,6 +94,8 @@ define i64 @read_flags_reg_pressure() nounwind { ; WIN64-NEXT: pushq %rbx ; WIN64-NEXT: subq $16, %rsp ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp +; WIN64-NEXT: pushq %rbp +; WIN64-NEXT: pushq %rax ; WIN64-NEXT: #APP ; WIN64-NEXT: #NO_APP ; WIN64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill @@ -103,6 +105,8 @@ define i64 @read_flags_reg_pressure() nounwind { ; WIN64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; WIN64-NEXT: #APP ; WIN64-NEXT: #NO_APP +; WIN64-NEXT: addq $8, %rsp +; WIN64-NEXT: popq %rbp ; WIN64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; WIN64-NEXT: addq $16, %rsp ; WIN64-NEXT: popq %rbx @@ -177,6 +181,8 @@ define void @write_flags_reg_pressure(i64 noundef %0) nounwind { ; WIN64-NEXT: subq $16, %rsp ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp ; WIN64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; WIN64-NEXT: pushq %rbp +; WIN64-NEXT: pushq %rax ; WIN64-NEXT: #APP ; WIN64-NEXT: #NO_APP ; WIN64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill @@ -186,6 +192,8 @@ define void @write_flags_reg_pressure(i64 noundef %0) nounwind { ; WIN64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; WIN64-NEXT: #APP ; WIN64-NEXT: #NO_APP +; WIN64-NEXT: popq %rax +; WIN64-NEXT: popq %rbp ; WIN64-NEXT: addq $16, %rsp ; WIN64-NEXT: popq %rbx ; WIN64-NEXT: popq %rdi