diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h index d8c9d0a432ad8f..3df9e56db38a43 100644 --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -475,6 +475,10 @@ class TargetFrameLowering { /// debug info. virtual DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const; + /// If frame pointer or base pointer is clobbered by an instruction, we should + /// spill/restore it around that instruction. + virtual void spillFPBP(MachineFunction &MF) const {} + /// This method is called at the end of prolog/epilog code insertion, so /// targets can emit remarks based on the final frame layout. virtual void emitRemarks(const MachineFunction &MF, diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp index 06de92515c0444..f5bedc7b8ecdfc 100644 --- a/llvm/lib/CodeGen/CFIInstrInserter.cpp +++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" using namespace llvm; @@ -184,6 +185,10 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { unsigned NumRegs = TRI.getNumSupportedRegs(*MF); BitVector CSRSaved(NumRegs), CSRRestored(NumRegs); +#ifndef NDEBUG + int RememberState = 0; +#endif + // Determine cfa offset and register set by the block. for (MachineInstr &MI : *MBBInfo.MBB) { if (MI.isCFIInstruction()) { @@ -228,17 +233,25 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { case MCCFIInstruction::OpRememberState: // TODO: Add support for handling cfi_remember_state. #ifndef NDEBUG - report_fatal_error( - "Support for cfi_remember_state not implemented! Value of CFA " - "may be incorrect!\n"); + // Currently we need cfi_remember_state and cfi_restore_state to be in + // the same BB, so it will not impact outgoing CFA. + ++RememberState; + if (RememberState != 1) + MF->getContext().reportError( + SMLoc(), + "Support for cfi_remember_state not implemented! Value of CFA " + "may be incorrect!\n"); #endif break; case MCCFIInstruction::OpRestoreState: // TODO: Add support for handling cfi_restore_state. #ifndef NDEBUG - report_fatal_error( - "Support for cfi_restore_state not implemented! Value of CFA may " - "be incorrect!\n"); + --RememberState; + if (RememberState != 0) + MF->getContext().reportError( + SMLoc(), + "Support for cfi_restore_state not implemented! Value of CFA may " + "be incorrect!\n"); #endif break; // Other CFI directives do not affect CFA value. @@ -264,6 +277,14 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { } } +#ifndef NDEBUG + if (RememberState != 0) + MF->getContext().reportError( + SMLoc(), + "Support for cfi_remember_state not implemented! Value of CFA may be " + "incorrect!\n"); +#endif + MBBInfo.Processed = true; // Update outgoing CFA info. diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index f4490873cfdcdb..ee03eaa8ae527c 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -228,6 +228,11 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) { FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF); ORE = &getAnalysis().getORE(); + // Spill frame pointer and/or base pointer registers if they are clobbered. + // It is placed before call frame instruction elimination so it will not mess + // with stack arguments. + TFI->spillFPBP(MF); + // Calculate the MaxCallFrameSize value for the function's frame // information. Also eliminates call frame pseudo instructions. calculateCallFrameInfo(MF); diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index bdc9a0d29670a1..77dac1197f85e9 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -4227,3 +4227,323 @@ void X86FrameLowering::restoreWinEHStackPointersInParent( /*RestoreSP=*/IsSEH); } } + +// Compute the alignment gap between current SP after spilling FP/BP and the +// next properly aligned stack offset. +static int computeFPBPAlignmentGap(MachineFunction &MF, + const TargetRegisterClass *RC, + unsigned NumSpilledRegs) { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + unsigned AllocSize = TRI->getSpillSize(*RC) * NumSpilledRegs; + Align StackAlign = MF.getSubtarget().getFrameLowering()->getStackAlign(); + unsigned AlignedSize = alignTo(AllocSize, StackAlign); + return AlignedSize - AllocSize; +} + +void X86FrameLowering::spillFPBPUsingSP(MachineFunction &MF, + MachineBasicBlock::iterator BeforeMI, + Register FP, Register BP, + int SPAdjust) const { + assert(FP.isValid() || BP.isValid()); + + MachineBasicBlock *MBB = BeforeMI->getParent(); + DebugLoc DL = BeforeMI->getDebugLoc(); + + // Spill FP. + if (FP.isValid()) { + BuildMI(*MBB, BeforeMI, DL, + TII.get(getPUSHOpcode(MF.getSubtarget()))) + .addReg(FP); + } + + // Spill BP. + if (BP.isValid()) { + BuildMI(*MBB, BeforeMI, DL, + TII.get(getPUSHOpcode(MF.getSubtarget()))) + .addReg(BP); + } + + // Make sure SP is aligned. + if (SPAdjust) + emitSPUpdate(*MBB, BeforeMI, DL, -SPAdjust, false); + + // Emit unwinding information. + if (FP.isValid() && needsDwarfCFI(MF)) { + // Emit .cfi_remember_state to remember old frame. + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr)); + BuildMI(*MBB, BeforeMI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + // Setup new CFA value with DW_CFA_def_cfa_expression: + // DW_OP_breg7+offset, DW_OP_deref, DW_OP_consts 16, DW_OP_plus + SmallString<64> CfaExpr; + uint8_t buffer[16]; + int Offset = SPAdjust; + if (BP.isValid()) + Offset += TRI->getSpillSize(*TRI->getMinimalPhysRegClass(BP)); + // If BeforeMI is a frame setup instruction, we need to adjust the position + // and offset of the new cfi instruction. + if (TII.isFrameSetup(*BeforeMI)) { + Offset += alignTo(TII.getFrameSize(*BeforeMI), getStackAlign()); + BeforeMI = std::next(BeforeMI); + } + Register StackPtr = TRI->getStackRegister(); + if (STI.isTarget64BitILP32()) + StackPtr = Register(getX86SubSuperRegister(StackPtr, 64)); + unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackPtr, true); + CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfStackPtr)); + CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer)); + CfaExpr.push_back(dwarf::DW_OP_deref); + CfaExpr.push_back(dwarf::DW_OP_consts); + CfaExpr.append(buffer, buffer + encodeSLEB128(SlotSize * 2, buffer)); + CfaExpr.push_back((uint8_t)dwarf::DW_OP_plus); + + SmallString<64> DefCfaExpr; + DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression); + DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer)); + DefCfaExpr.append(CfaExpr.str()); + BuildCFI(*MBB, BeforeMI, DL, + MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()), + MachineInstr::FrameSetup); + } +} + +void X86FrameLowering::restoreFPBPUsingSP(MachineFunction &MF, + MachineBasicBlock::iterator AfterMI, + Register FP, Register BP, + int SPAdjust) const { + assert(FP.isValid() || BP.isValid()); + + // Adjust SP so it points to spilled FP or BP. + MachineBasicBlock *MBB = AfterMI->getParent(); + MachineBasicBlock::iterator Pos = std::next(AfterMI); + DebugLoc DL = AfterMI->getDebugLoc(); + if (SPAdjust) + emitSPUpdate(*MBB, Pos, DL, SPAdjust, false); + + // Restore BP. + if (BP.isValid()) { + BuildMI(*MBB, Pos, DL, + TII.get(getPOPOpcode(MF.getSubtarget())), BP); + } + + // Restore FP. + if (FP.isValid()) { + BuildMI(*MBB, Pos, DL, + TII.get(getPOPOpcode(MF.getSubtarget())), FP); + + // Emit unwinding information. + if (needsDwarfCFI(MF)) { + // Restore original frame with .cfi_restore_state. + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createRestoreState(nullptr)); + BuildMI(*MBB, Pos, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + } +} + +void X86FrameLowering::saveAndRestoreFPBPUsingSP( + MachineFunction &MF, MachineBasicBlock::iterator BeforeMI, + MachineBasicBlock::iterator AfterMI, bool SpillFP, bool SpillBP) const { + assert(SpillFP || SpillBP); + + Register FP, BP; + const TargetRegisterClass *RC; + unsigned NumRegs = 0; + + if (SpillFP) { + FP = TRI->getFrameRegister(MF); + if (STI.isTarget64BitILP32()) + FP = Register(getX86SubSuperRegister(FP, 64)); + RC = TRI->getMinimalPhysRegClass(FP); + ++NumRegs; + } + if (SpillBP) { + BP = TRI->getBaseRegister(); + if (STI.isTarget64BitILP32()) + BP = Register(getX86SubSuperRegister(BP, 64)); + RC = TRI->getMinimalPhysRegClass(BP); + ++NumRegs; + } + int SPAdjust = computeFPBPAlignmentGap(MF, RC, NumRegs); + + spillFPBPUsingSP(MF, BeforeMI, FP, BP, SPAdjust); + restoreFPBPUsingSP(MF, AfterMI, FP, BP, SPAdjust); +} + +bool X86FrameLowering::skipSpillFPBP( + MachineFunction &MF, MachineBasicBlock::reverse_iterator &MI) const { + if (MI->getOpcode() == X86::LCMPXCHG16B_SAVE_RBX) { + // The pseudo instruction LCMPXCHG16B_SAVE_RBX is generated in the form + // SaveRbx = COPY RBX + // SaveRbx = LCMPXCHG16B_SAVE_RBX ..., SaveRbx, implicit-def rbx + // And later LCMPXCHG16B_SAVE_RBX is expanded to restore RBX from SaveRbx. + // We should skip this instruction sequence. + int FI; + unsigned Reg; + while (!(MI->getOpcode() == TargetOpcode::COPY && + MI->getOperand(1).getReg() == X86::RBX) && + !((Reg = TII.isStoreToStackSlot(*MI, FI)) && Reg == X86::RBX)) + ++MI; + return true; + } + return false; +} + +static bool isFPBPAccess(const MachineInstr &MI, Register FP, Register BP, + const TargetRegisterInfo *TRI, bool &AccessFP, + bool &AccessBP) { + AccessFP = AccessBP = false; + if (FP) { + if (MI.findRegisterUseOperandIdx(FP, TRI, false) != -1 || + MI.findRegisterDefOperandIdx(FP, TRI, false, true) != -1) + AccessFP = true; + } + if (BP) { + if (MI.findRegisterUseOperandIdx(BP, TRI, false) != -1 || + MI.findRegisterDefOperandIdx(BP, TRI, false, true) != -1) + AccessBP = true; + } + return AccessFP || AccessBP; +} + +// Invoke instruction has been lowered to normal function call. We try to figure +// out if MI comes from Invoke. +// Do we have any better method? +static bool isInvoke(const MachineInstr &MI, bool InsideEHLabels) { + if (!MI.isCall()) + return false; + if (InsideEHLabels) + return true; + + const MachineBasicBlock *MBB = MI.getParent(); + if (!MBB->hasEHPadSuccessor()) + return false; + + // Check if there is another call instruction from MI to the end of MBB. + MachineBasicBlock::const_iterator MBBI = MI, ME = MBB->end(); + for (++MBBI; MBBI != ME; ++MBBI) + if (MBBI->isCall()) + return false; + return true; +} + +/// If a function uses base pointer and the base pointer is clobbered by inline +/// asm, RA doesn't detect this case, and after the inline asm, the base pointer +/// contains garbage value. +/// For example if a 32b x86 function uses base pointer esi, and esi is +/// clobbered by following inline asm +/// asm("rep movsb" : "+D"(ptr), "+S"(x), "+c"(c)::"memory"); +/// We need to save esi before the asm and restore it after the asm. +/// +/// The problem can also occur to frame pointer if there is a function call, and +/// the callee uses a different calling convention and clobbers the fp. +/// +/// Because normal frame objects (spill slots) are accessed through fp/bp +/// register, so we can't spill fp/bp to normal spill slots. +/// +/// FIXME: There are 2 possible enhancements: +/// 1. In many cases there are different physical registers not clobbered by +/// inline asm, we can use one of them as base pointer. Or use a virtual +/// register as base pointer and let RA allocate a physical register to it. +/// 2. If there is no other instructions access stack with fp/bp from the +/// inline asm to the epilog, and no cfi requirement for a correct fp, we can +/// skip the save and restore operations. +void X86FrameLowering::spillFPBP(MachineFunction &MF) const { + Register FP, BP; + const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); + if (TFI.hasFP(MF)) + FP = TRI->getFrameRegister(MF); + if (TRI->hasBasePointer(MF)) + BP = TRI->getBaseRegister(); + if (!FP && !BP) + return; + + for (MachineBasicBlock &MBB : MF) { + bool InsideEHLabels = false; + auto MI = MBB.rbegin(), ME = MBB.rend(); + auto TermMI = MBB.getFirstTerminator(); + if (TermMI != MBB.begin()) + MI = *(std::prev(TermMI)); + + while (MI != ME) { + // Skip frame setup/destroy instructions. + // Skip Invoke (call inside try block) instructions. + // Skip instructions handled by target. + if (MI->getFlag(MachineInstr::MIFlag::FrameSetup) || + MI->getFlag(MachineInstr::MIFlag::FrameDestroy) || + isInvoke(*MI, InsideEHLabels) || skipSpillFPBP(MF, MI)) { + ++MI; + continue; + } + + if (MI->getOpcode() == TargetOpcode::EH_LABEL) { + InsideEHLabels = !InsideEHLabels; + ++MI; + continue; + } + + bool AccessFP, AccessBP; + // Check if fp or bp is used in MI. + if (!isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP)) { + ++MI; + continue; + } + + // Look for the range [DefMI, KillMI] in which fp or bp is defined and + // used. + bool FPLive = false, BPLive = false; + bool SpillFP = false, SpillBP = false; + auto DefMI = MI, KillMI = MI; + do { + SpillFP |= AccessFP; + SpillBP |= AccessBP; + + // Maintain FPLive and BPLive. + if (FPLive && MI->findRegisterDefOperandIdx(FP, TRI, false, true) != -1) + FPLive = false; + if (FP && MI->findRegisterUseOperandIdx(FP, TRI, false) != -1) + FPLive = true; + if (BPLive && MI->findRegisterDefOperandIdx(BP, TRI, false, true) != -1) + BPLive = false; + if (BP && MI->findRegisterUseOperandIdx(BP, TRI, false) != -1) + BPLive = true; + + DefMI = MI++; + } while ((MI != ME) && + (FPLive || BPLive || + isFPBPAccess(*MI, FP, BP, TRI, AccessFP, AccessBP))); + + // Don't need to save/restore if FP is accessed through llvm.frameaddress. + if (FPLive && !SpillBP) + continue; + + // If the bp is clobbered by a call, we should save and restore outside of + // the frame setup instructions. + if (KillMI->isCall() && DefMI != ME) { + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + auto FrameSetup = std::next(DefMI); + // Look for frame setup instruction toward the start of the BB. + // If we reach another call instruction, it means no frame setup + // instruction for the current call instruction. + while (FrameSetup != ME && !TII.isFrameSetup(*FrameSetup) && + !FrameSetup->isCall()) + ++FrameSetup; + // If a frame setup instruction is found, we need to find out the + // corresponding frame destroy instruction. + if (FrameSetup != ME && TII.isFrameSetup(*FrameSetup)) { + while (!TII.isFrameInstr(*KillMI)) + --KillMI; + DefMI = FrameSetup; + MI = DefMI; + ++MI; + } + } + + // Call target function to spill and restore FP and BP registers. + saveAndRestoreFPBPUsingSP(MF, &(*DefMI), &(*KillMI), SpillFP, SpillBP); + } + } +} diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h index 2dc9ecc6109d78..e21f6ab3d16d5f 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -103,6 +103,8 @@ class X86FrameLowering : public TargetFrameLowering { MutableArrayRef CSI, const TargetRegisterInfo *TRI) const override; + void spillFPBP(MachineFunction &MF) const override; + bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override; @@ -267,6 +269,29 @@ class X86FrameLowering : public TargetFrameLowering { void emitCatchRetReturnValue(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineInstr *CatchRet) const; + + /// Issue instructions to allocate stack space and spill frame pointer and/or + /// base pointer to stack using stack pointer register. + void spillFPBPUsingSP(MachineFunction &MF, + const MachineBasicBlock::iterator BeforeMI, Register FP, + Register BP, int SPAdjust) const; + + /// Issue instructions to restore frame pointer and/or base pointer from stack + /// using stack pointer register, and free stack space. + void restoreFPBPUsingSP(MachineFunction &MF, + const MachineBasicBlock::iterator AfterMI, + Register FP, Register BP, int SPAdjust) const; + + void saveAndRestoreFPBPUsingSP(MachineFunction &MF, + MachineBasicBlock::iterator BeforeMI, + MachineBasicBlock::iterator AfterMI, + bool SpillFP, bool SpillBP) const; + + // If MI uses fp/bp, but target can handle it, and doesn't want to be spilled + // again, this function should return true, and update MI so we will not check + // any instructions from related sequence. + bool skipSpillFPBP(MachineFunction &MF, + MachineBasicBlock::reverse_iterator &MI) const; }; } // End llvm namespace diff --git a/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll b/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll index aa5c54d30e3bc4..f20c4c1ae27867 100644 --- a/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll +++ b/llvm/test/CodeGen/X86/apx/push2-pop2-vector-register.ll @@ -43,8 +43,12 @@ define void @widget(float %arg) nounwind { ; FRAME-NEXT: xorl %r8d, %r8d ; FRAME-NEXT: callq *%rsi ; FRAME-NEXT: movss %xmm6, 0 +; FRAME-NEXT: pushq %rbp +; FRAME-NEXT: pushq %rax ; FRAME-NEXT: #APP ; FRAME-NEXT: #NO_APP +; FRAME-NEXT: popq %rax +; FRAME-NEXT: popq %rbp ; FRAME-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload ; FRAME-NEXT: addq $48, %rsp ; FRAME-NEXT: pop2 %r15, %rsi diff --git a/llvm/test/CodeGen/X86/apx/push2-pop2.ll b/llvm/test/CodeGen/X86/apx/push2-pop2.ll index 25139f1da8272c..6bd9f525090ee2 100644 --- a/llvm/test/CodeGen/X86/apx/push2-pop2.ll +++ b/llvm/test/CodeGen/X86/apx/push2-pop2.ll @@ -24,8 +24,12 @@ define void @csr1() nounwind { ; FRAME: # %bb.0: # %entry ; FRAME-NEXT: pushq %rbp ; FRAME-NEXT: movq %rsp, %rbp +; FRAME-NEXT: pushq %rbp +; FRAME-NEXT: pushq %rax ; FRAME-NEXT: #APP ; FRAME-NEXT: #NO_APP +; FRAME-NEXT: popq %rax +; FRAME-NEXT: popq %rbp ; FRAME-NEXT: popq %rbp ; FRAME-NEXT: retq entry: @@ -59,8 +63,12 @@ define void @csr2() nounwind { ; FRAME-NEXT: pushq %rbp ; FRAME-NEXT: movq %rsp, %rbp ; FRAME-NEXT: pushq %r15 +; FRAME-NEXT: pushq %rbp +; FRAME-NEXT: pushq %rax ; FRAME-NEXT: #APP ; FRAME-NEXT: #NO_APP +; FRAME-NEXT: popq %rax +; FRAME-NEXT: popq %rbp ; FRAME-NEXT: popq %r15 ; FRAME-NEXT: popq %rbp ; FRAME-NEXT: retq @@ -95,8 +103,12 @@ define void @csr3() nounwind { ; FRAME-NEXT: pushq %rbp ; FRAME-NEXT: movq %rsp, %rbp ; FRAME-NEXT: push2 %r14, %r15 +; FRAME-NEXT: pushq %rbp +; FRAME-NEXT: pushq %rax ; FRAME-NEXT: #APP ; FRAME-NEXT: #NO_APP +; FRAME-NEXT: popq %rax +; FRAME-NEXT: popq %rbp ; FRAME-NEXT: pop2 %r15, %r14 ; FRAME-NEXT: popq %rbp ; FRAME-NEXT: retq @@ -136,8 +148,12 @@ define void @csr4() nounwind { ; FRAME-NEXT: movq %rsp, %rbp ; FRAME-NEXT: push2 %r14, %r15 ; FRAME-NEXT: pushq %r13 +; FRAME-NEXT: pushq %rbp +; FRAME-NEXT: pushq %rax ; FRAME-NEXT: #APP ; FRAME-NEXT: #NO_APP +; FRAME-NEXT: popq %rax +; FRAME-NEXT: popq %rbp ; FRAME-NEXT: popq %r13 ; FRAME-NEXT: pop2 %r15, %r14 ; FRAME-NEXT: popq %rbp @@ -178,8 +194,12 @@ define void @csr5() nounwind { ; FRAME-NEXT: movq %rsp, %rbp ; FRAME-NEXT: push2 %r14, %r15 ; FRAME-NEXT: push2 %r12, %r13 +; FRAME-NEXT: pushq %rbp +; FRAME-NEXT: pushq %rax ; FRAME-NEXT: #APP ; FRAME-NEXT: #NO_APP +; FRAME-NEXT: popq %rax +; FRAME-NEXT: popq %rbp ; FRAME-NEXT: pop2 %r13, %r12 ; FRAME-NEXT: pop2 %r15, %r14 ; FRAME-NEXT: popq %rbp @@ -225,8 +245,12 @@ define void @csr6() nounwind { ; FRAME-NEXT: push2 %r14, %r15 ; FRAME-NEXT: push2 %r12, %r13 ; FRAME-NEXT: pushq %rbx +; FRAME-NEXT: pushq %rbp +; FRAME-NEXT: pushq %rax ; FRAME-NEXT: #APP ; FRAME-NEXT: #NO_APP +; FRAME-NEXT: popq %rax +; FRAME-NEXT: popq %rbp ; FRAME-NEXT: popq %rbx ; FRAME-NEXT: pop2 %r13, %r12 ; FRAME-NEXT: pop2 %r15, %r14 diff --git a/llvm/test/CodeGen/X86/apx/pushp-popp.ll b/llvm/test/CodeGen/X86/apx/pushp-popp.ll index ad4306fccce669..625e70b07198e8 100644 --- a/llvm/test/CodeGen/X86/apx/pushp-popp.ll +++ b/llvm/test/CodeGen/X86/apx/pushp-popp.ll @@ -18,8 +18,12 @@ define void @csr2() nounwind { ; FRAME-NEXT: pushp %rbp ; FRAME-NEXT: movq %rsp, %rbp ; FRAME-NEXT: pushp %r15 +; FRAME-NEXT: pushp %rbp +; FRAME-NEXT: pushq %rax ; FRAME-NEXT: #APP ; FRAME-NEXT: #NO_APP +; FRAME-NEXT: popq %rax +; FRAME-NEXT: popp %rbp ; FRAME-NEXT: popp %r15 ; FRAME-NEXT: popp %rbp ; FRAME-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll index 25d182afd66e71..78870278eeace9 100644 --- a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll +++ b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll @@ -69,8 +69,12 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind { ; X64-NEXT: andq $-64, %rsp ; X64-NEXT: subq $128, %rsp ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 +; X64-NEXT: pushq %rbp +; X64-NEXT: pushq %rax ; X64-NEXT: movq %rsp, %rdi ; X64-NEXT: callq _func_float16_ptr +; X64-NEXT: addq $8, %rsp +; X64-NEXT: popq %rbp ; X64-NEXT: vaddps (%rsp), %zmm0, %zmm0 ; X64-NEXT: leaq -16(%rbp), %rsp ; X64-NEXT: popq %r12 @@ -149,8 +153,12 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind { ; X64-NEXT: subq $128, %rsp ; X64-NEXT: vmovaps %zmm1, %zmm16 ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 +; X64-NEXT: pushq %rbp +; X64-NEXT: pushq %rax ; X64-NEXT: movq %rsp, %rdi ; X64-NEXT: callq _func_float16_ptr +; X64-NEXT: addq $8, %rsp +; X64-NEXT: popq %rbp ; X64-NEXT: vaddps %zmm16, %zmm0, %zmm0 ; X64-NEXT: vaddps (%rsp), %zmm0, %zmm0 ; X64-NEXT: leaq -16(%rbp), %rsp diff --git a/llvm/test/CodeGen/X86/clobber_base_ptr.ll b/llvm/test/CodeGen/X86/clobber_base_ptr.ll new file mode 100644 index 00000000000000..2c39560f02d160 --- /dev/null +++ b/llvm/test/CodeGen/X86/clobber_base_ptr.ll @@ -0,0 +1,118 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:32-n8:16:32-a:0:32-S32" +target triple = "i386-pc-windows-gnu" + +; This function uses esi as base pointer, the inline asm clobbers esi, so we +; should save esi using esp before the inline asm, and restore esi after the +; inline asm. + +define i32 @clober_bp() { +; CHECK-LABEL: clober_bp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset %ebp, -8 +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: .cfi_def_cfa_register %ebp +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: andl $-16, %esp +; CHECK-NEXT: subl $16, %esp +; CHECK-NEXT: movl %esp, %esi +; CHECK-NEXT: .cfi_offset %esi, -16 +; CHECK-NEXT: .cfi_offset %edi, -12 +; CHECK-NEXT: movl $4, 12(%esi) +; CHECK-NEXT: movl 12(%esi), %eax +; CHECK-NEXT: addl $3, %eax +; CHECK-NEXT: andl $-4, %eax +; CHECK-NEXT: calll __alloca +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: andl $-16, %eax +; CHECK-NEXT: movl %eax, %esp +; CHECK-NEXT: movl $1, (%eax) +; CHECK-NEXT: leal 8(%esi), %edi +; CHECK-NEXT: movl $4, %ecx +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: #APP +; CHECK-NEXT: rep movsb (%esi), %es:(%edi) +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: popl %esi +; CHECK-NEXT: movl 8(%esi), %eax +; CHECK-NEXT: leal -8(%ebp), %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl +entry: + %size = alloca i32, align 4 + %g = alloca i32, align 4 + store volatile i32 4, ptr %size, align 4 + %len = load volatile i32, ptr %size, align 4 + %var_array = alloca i8, i32 %len, align 16 + store i32 1, ptr %var_array, align 16 + %nil = call { ptr, ptr, i32 } asm "rep movsb", "={di},={si},={cx},0,1,2,~{memory},~{dirflag},~{fpsr},~{flags}"(ptr %g, ptr %var_array, i32 4) + %retval = load i32, ptr %g, align 4 + ret i32 %retval +} + +; This function has the same code except the inline asm also clobbers +; frame pointer. + +define i32 @clobber_bpfp() { +; CHECK-LABEL: clobber_bpfp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset %ebp, -8 +; CHECK-NEXT: movl %esp, %ebp +; CHECK-NEXT: .cfi_def_cfa_register %ebp +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: andl $-16, %esp +; CHECK-NEXT: subl $16, %esp +; CHECK-NEXT: movl %esp, %esi +; CHECK-NEXT: .cfi_offset %esi, -16 +; CHECK-NEXT: .cfi_offset %edi, -12 +; CHECK-NEXT: movl $4, 12(%esi) +; CHECK-NEXT: movl 12(%esi), %eax +; CHECK-NEXT: addl $3, %eax +; CHECK-NEXT: andl $-4, %eax +; CHECK-NEXT: calll __alloca +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: andl $-16, %eax +; CHECK-NEXT: movl %eax, %esp +; CHECK-NEXT: movl $1, (%eax) +; CHECK-NEXT: leal 8(%esi), %edi +; CHECK-NEXT: movl $4, %ecx +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: .cfi_escape 0x0f, 0x06, 0x74, 0x04, 0x06, 0x11, 0x08, 0x22 # +; CHECK-NEXT: movl %eax, %esi +; CHECK-NEXT: #APP +; CHECK-NEXT: rep movsb (%esi), %es:(%edi) +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: movl 8(%esi), %eax +; CHECK-NEXT: leal -8(%ebp), %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl +entry: + %size = alloca i32, align 4 + %g = alloca i32, align 4 + store volatile i32 4, ptr %size, align 4 + %len = load volatile i32, ptr %size, align 4 + %var_array = alloca i8, i32 %len, align 16 + store i32 1, ptr %var_array, align 16 + %nil = call { ptr, ptr, i32 } asm "rep movsb", "={di},={si},={cx},0,1,2,~{memory},~{dirflag},~{fpsr},~{flags},~{ebp}"(ptr %g, ptr %var_array, i32 4) + %retval = load i32, ptr %g, align 4 + ret i32 %retval +} + diff --git a/llvm/test/CodeGen/X86/clobber_frame_ptr.ll b/llvm/test/CodeGen/X86/clobber_frame_ptr.ll new file mode 100644 index 00000000000000..6209e1a85e9e19 --- /dev/null +++ b/llvm/test/CodeGen/X86/clobber_frame_ptr.ll @@ -0,0 +1,159 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=x86_64-pc-linux -stackrealign -verify-machineinstrs < %s | FileCheck %s + +; Calling convention ghccc uses ebp to pass parameter, so calling a function +; using ghccc clobbers ebp. We should save and restore ebp around such a call +; if ebp is used as frame pointer. + +declare ghccc i32 @external(i32) + +; Basic test with ghccc calling convention. +define i32 @test1(i32 %0, i32 %1) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: andq $-16, %rsp +; CHECK-NEXT: subq $16, %rsp +; CHECK-NEXT: .cfi_offset %rbx, -56 +; CHECK-NEXT: .cfi_offset %r12, -48 +; CHECK-NEXT: .cfi_offset %r13, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: .cfi_escape 0x0f, 0x06, 0x77, 0x08, 0x06, 0x11, 0x10, 0x22 # +; CHECK-NEXT: movl %esi, %ebp +; CHECK-NEXT: movq %rdi, %r13 +; CHECK-NEXT: callq external@PLT +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: leaq -40(%rbp), %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq + %x = call ghccc i32 @external(i32 %0, i32 %1) + ret i32 %x +} + +; Calling convention hipe has similar behavior. It clobbers rbp but not rbx. + +declare cc 11 i64 @hipe1(i64) +declare cc 11 i64 @hipe2(i64, i64, i64, i64, i64, i64, i64) + +; Basic test with hipe calling convention. +define i64 @test2(i64 %a0, i64 %a1) { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: andq $-16, %rsp +; CHECK-NEXT: subq $16, %rsp +; CHECK-NEXT: .cfi_offset %rbx, -56 +; CHECK-NEXT: .cfi_offset %r12, -48 +; CHECK-NEXT: .cfi_offset %r13, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: .cfi_escape 0x0f, 0x06, 0x77, 0x08, 0x06, 0x11, 0x10, 0x22 # +; CHECK-NEXT: movq %rsi, %rbp +; CHECK-NEXT: movq %rdi, %r15 +; CHECK-NEXT: callq hipe1@PLT +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: movq %r15, %rax +; CHECK-NEXT: leaq -40(%rbp), %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq + %x = call cc 11 i64 @hipe1(i64 %a0, i64 %a1) + ret i64 %x +} + +; Test with more arguments, so some of them are passed from stack. The spilling +; of rbp should not disturb stack arguments. +; fixme: current generated code is wrong because rbp is used to load passed in +; argument after rbp is assigned argument for function call, it is caused +; by x86-cf-opt. +define i64 @test3(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7) { +; CHECK-LABEL: test3: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: andq $-16, %rsp +; CHECK-NEXT: subq $16, %rsp +; CHECK-NEXT: .cfi_offset %rbx, -56 +; CHECK-NEXT: .cfi_offset %r12, -48 +; CHECK-NEXT: .cfi_offset %r13, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: .cfi_escape 0x0f, 0x06, 0x77, 0x08, 0x06, 0x11, 0x10, 0x22 # +; CHECK-NEXT: movq %rsi, %rbp +; CHECK-NEXT: movq %rdi, %r15 +; CHECK-NEXT: movq %rdx, %rsi +; CHECK-NEXT: movq %rcx, %rdx +; CHECK-NEXT: movq %r8, %rcx +; CHECK-NEXT: movq %r9, %r8 +; CHECK-NEXT: pushq 24(%rbp) +; CHECK-NEXT: pushq 16(%rbp) +; CHECK-NEXT: callq hipe2@PLT +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: addq $16, %rsp +; CHECK-NEXT: movq %r15, %rax +; CHECK-NEXT: leaq -40(%rbp), %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq + %x = call cc 11 i64 @hipe2(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7) + ret i64 %x +} diff --git a/llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll b/llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll new file mode 100644 index 00000000000000..25c951d8b1a109 --- /dev/null +++ b/llvm/test/CodeGen/X86/clobber_frame_ptr_x32.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s | FileCheck %s + +target triple = "x86_64-linux-gnux32" + +define i32 @foo() { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: subq $16, %rsp +; CHECK-NEXT: movl $4, -8(%rbp) +; CHECK-NEXT: movl $5, -4(%rbp) +; CHECK-NEXT: movl -8(%rbp), %eax +; CHECK-NEXT: movq %rsp, %rcx +; CHECK-NEXT: addq $15, %rax +; CHECK-NEXT: andq $-16, %rax +; CHECK-NEXT: movq %rcx, %rdx +; CHECK-NEXT: subq %rax, %rdx +; CHECK-NEXT: movq %rdx, %rsp +; CHECK-NEXT: negq %rax +; CHECK-NEXT: movl $1, (%rcx,%rax) +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: .cfi_escape 0x0f, 0x06, 0x77, 0x08, 0x06, 0x11, 0x10, 0x22 # +; CHECK-NEXT: movl $123, %ebp +; CHECK-NEXT: #APP +; CHECK-NEXT: nop +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: movl -4(%rbp), %eax +; CHECK-NEXT: movq %rbp, %rsp +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq +entry: + %size = alloca i32, align 4 + %g = alloca i32, align 4 + store volatile i32 4, ptr %size, align 4 + store volatile i32 5, ptr %g, align 4 + %len = load volatile i32, ptr %size, align 4 + %var_array = alloca i8, i32 %len, align 16 + store i32 1, ptr %var_array, align 16 + call void asm "nop", "{ebp},~{memory}"(i32 123) + %retval = load i32, ptr %g, align 4 + ret i32 %retval +} diff --git a/llvm/test/CodeGen/X86/i386-baseptr.ll b/llvm/test/CodeGen/X86/i386-baseptr.ll index 08e4bde7353a42..777eb838b84cc7 100644 --- a/llvm/test/CodeGen/X86/i386-baseptr.ll +++ b/llvm/test/CodeGen/X86/i386-baseptr.ll @@ -109,10 +109,14 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 ; CHECK-NEXT: subl %eax, %edx ; CHECK-NEXT: movl %edx, %esp ; CHECK-NEXT: negl %eax +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $28, %esp ; CHECK-NEXT: movl $405, %esi # imm = 0x195 ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP +; CHECK-NEXT: addl $28, %esp +; CHECK-NEXT: popl %esi ; CHECK-NEXT: movl $405, %ebx # imm = 0x195 ; CHECK-NEXT: #APP ; CHECK-NEXT: nop diff --git a/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll b/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll index 3c98eead8d18f5..d3ca872509ad5a 100644 --- a/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll +++ b/llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll @@ -37,6 +37,8 @@ define void @func() local_unnamed_addr #0 { ; CHECK-NEXT: .Ltmp0: ; CHECK-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %ebx ; CHECK-NEXT: calll static_func +; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: subl $12, %esp ; CHECK-NEXT: #APP ; CHECK-EMPTY: ; CHECK-NEXT: calll static_func @@ -52,6 +54,8 @@ define void @func() local_unnamed_addr #0 { ; CHECK-NEXT: shrl $0, %esp ; CHECK-EMPTY: ; CHECK-NEXT: #NO_APP +; CHECK-NEXT: addl $12, %esp +; CHECK-NEXT: popl %ebp entry: %call = tail call i32 @static_func() ;; We test call, CALL, and jmp. diff --git a/llvm/test/CodeGen/X86/x86-32-intrcc.ll b/llvm/test/CodeGen/X86/x86-32-intrcc.ll index 3c3944c2082bd5..a0f937e2c323b6 100644 --- a/llvm/test/CodeGen/X86/x86-32-intrcc.ll +++ b/llvm/test/CodeGen/X86/x86-32-intrcc.ll @@ -108,8 +108,10 @@ define x86_intrcc void @test_isr_clobbers(ptr byval(%struct.interrupt_frame) %fr ; CHECK-NEXT: pushl %eax ; CHECK-NEXT: andl $-16, %esp ; CHECK-NEXT: cld +; CHECK-NEXT: pushl %ebp ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP +; CHECK-NEXT: popl %ebp ; CHECK-NEXT: leal -12(%ebp), %esp ; CHECK-NEXT: popl %eax ; CHECK-NEXT: popl %ebx @@ -127,8 +129,10 @@ define x86_intrcc void @test_isr_clobbers(ptr byval(%struct.interrupt_frame) %fr ; CHECK0-NEXT: pushl %eax ; CHECK0-NEXT: andl $-16, %esp ; CHECK0-NEXT: cld +; CHECK0-NEXT: pushl %ebp ; CHECK0-NEXT: #APP ; CHECK0-NEXT: #NO_APP +; CHECK0-NEXT: popl %ebp ; CHECK0-NEXT: leal -12(%ebp), %esp ; CHECK0-NEXT: popl %eax ; CHECK0-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/x86-64-baseptr.ll b/llvm/test/CodeGen/X86/x86-64-baseptr.ll index 8cda4ba2814bba..020004def6e7ad 100644 --- a/llvm/test/CodeGen/X86/x86-64-baseptr.ll +++ b/llvm/test/CodeGen/X86/x86-64-baseptr.ll @@ -136,10 +136,14 @@ define void @clobber_base() #0 { ; X32ABI-NEXT: subl %eax, %edx ; X32ABI-NEXT: negl %eax ; X32ABI-NEXT: movl %edx, %esp +; X32ABI-NEXT: pushq %rbx +; X32ABI-NEXT: subl $24, %esp ; X32ABI-NEXT: movl $405, %ebx # imm = 0x195 ; X32ABI-NEXT: #APP ; X32ABI-NEXT: nop ; X32ABI-NEXT: #NO_APP +; X32ABI-NEXT: addl $24, %esp +; X32ABI-NEXT: popq %rbx ; X32ABI-NEXT: movl $8, %edx ; X32ABI-NEXT: #APP ; X32ABI-NEXT: movl %edx, (%ebx) @@ -268,6 +272,8 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 ; X32ABI-NEXT: subl %eax, %edx ; X32ABI-NEXT: negl %eax ; X32ABI-NEXT: movl %edx, %esp +; X32ABI-NEXT: pushq %rbx +; X32ABI-NEXT: subl $24, %esp ; X32ABI-NEXT: movl $405, %ebx # imm = 0x195 ; X32ABI-NEXT: #APP ; X32ABI-NEXT: nop @@ -275,6 +281,8 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 ; X32ABI-NEXT: #APP ; X32ABI-NEXT: nop ; X32ABI-NEXT: #NO_APP +; X32ABI-NEXT: addl $24, %esp +; X32ABI-NEXT: popq %rbx ; X32ABI-NEXT: movl $8, %edx ; X32ABI-NEXT: #APP ; X32ABI-NEXT: movl %edx, (%ebx) @@ -385,10 +393,14 @@ define void @vmw_host_printf(ptr %fmt, ...) nounwind { ; X32ABI-NEXT: movl $48, (%eax) ; X32ABI-NEXT: movl $8, (%eax) ; X32ABI-NEXT: xorl %eax, %eax +; X32ABI-NEXT: pushq %rbx +; X32ABI-NEXT: subl $24, %esp ; X32ABI-NEXT: xorl %ebx, %ebx ; X32ABI-NEXT: xorl %ecx, %ecx ; X32ABI-NEXT: #APP ; X32ABI-NEXT: #NO_APP +; X32ABI-NEXT: addl $24, %esp +; X32ABI-NEXT: popq %rbx ; X32ABI-NEXT: leal -8(%ebp), %esp ; X32ABI-NEXT: popq %rbx ; X32ABI-NEXT: popq %rbp diff --git a/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll b/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll index 47aefdbf0e466e..b4c18dd7f4573e 100644 --- a/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll +++ b/llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll @@ -94,6 +94,8 @@ define i64 @read_flags_reg_pressure() nounwind { ; WIN64-NEXT: pushq %rbx ; WIN64-NEXT: subq $16, %rsp ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp +; WIN64-NEXT: pushq %rbp +; WIN64-NEXT: pushq %rax ; WIN64-NEXT: #APP ; WIN64-NEXT: #NO_APP ; WIN64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill @@ -103,6 +105,8 @@ define i64 @read_flags_reg_pressure() nounwind { ; WIN64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; WIN64-NEXT: #APP ; WIN64-NEXT: #NO_APP +; WIN64-NEXT: addq $8, %rsp +; WIN64-NEXT: popq %rbp ; WIN64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; WIN64-NEXT: addq $16, %rsp ; WIN64-NEXT: popq %rbx @@ -177,6 +181,8 @@ define void @write_flags_reg_pressure(i64 noundef %0) nounwind { ; WIN64-NEXT: subq $16, %rsp ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp ; WIN64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; WIN64-NEXT: pushq %rbp +; WIN64-NEXT: pushq %rax ; WIN64-NEXT: #APP ; WIN64-NEXT: #NO_APP ; WIN64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill @@ -186,6 +192,8 @@ define void @write_flags_reg_pressure(i64 noundef %0) nounwind { ; WIN64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; WIN64-NEXT: #APP ; WIN64-NEXT: #NO_APP +; WIN64-NEXT: popq %rax +; WIN64-NEXT: popq %rbp ; WIN64-NEXT: addq $16, %rsp ; WIN64-NEXT: popq %rbx ; WIN64-NEXT: popq %rdi